/* $Id: Seq_id.cpp 666463 2023-04-26 15:21:25Z grichenk $
 * ===========================================================================
 *
 *                            PUBLIC DOMAIN NOTICE
 *               National Center for Biotechnology Information
 *
 *  This software/database is a "United States Government Work" under the
 *  terms of the United States Copyright Act.  It was written as part of
 *  the author's official duties as a United States Government employee and
 *  thus cannot be copyrighted.  This software/database is freely available
 *  to the public for use. The National Library of Medicine and the U.S.
 *  Government have not placed any restriction on its use or reproduction.
 *
 *  Although all reasonable efforts have been taken to ensure the accuracy
 *  and reliability of the software and data, the NLM and the U.S.
 *  Government do not and cannot warrant the performance or results that
 *  may be obtained by using this software or data. The NLM and the U.S.
 *  Government disclaim all warranties, express or implied, including
 *  warranties of performance, merchantability or fitness for any particular
 *  purpose.
 *
 *  Please cite the author in any work or product based on this material.
 *
 * ===========================================================================
 *
 * Author:  .......
 *
 * File Description:
 *   .......
 *
 * Remark:
 *   This code was originally generated by application DATATOOL
 *   using specifications from the ASN data definition file
 *   'seqloc.asn'.
 */

// standard includes

// generated includes
#include <ncbi_pch.hpp>
#include <corelib/ncbiutil.hpp>
#include <corelib/ncbi_param.hpp>
#include <util/line_reader.hpp>
#include <util/static_map.hpp>
#include <util/util_misc.hpp>
#include <util/bitset/ncbi_bitset.hpp>
#include <serial/serialimpl.hpp>

#include <objects/seq/Bioseq.hpp>
#include <objects/seq/Seq_inst.hpp>
#include <objects/seq/seq_id_handle.hpp>

#include <objects/seqloc/Seq_id.hpp>
#include <objects/seqloc/Textseq_id.hpp>
#include <objects/seqloc/Giimport_id.hpp>
#include <objects/seqloc/Patent_seq_id.hpp>
#include <objects/seqloc/PDB_seq_id.hpp>

#include <objects/biblio/Id_pat.hpp>

#include <objects/general/Object_id.hpp>
#include <objects/general/Dbtag.hpp>
#include <objects/general/Date.hpp>
#include <objects/general/Date_std.hpp>
#include <objects/misc/error_codes.hpp>
#include <objects/seqloc/Patent_seq_id.hpp>
#include <objects/biblio/Id_pat.hpp>
#include <objects/seqloc/PDB_seq_id.hpp>
#include <corelib/ncbistre.hpp>

#include "accguide2.inc"


#define NCBI_USE_ERRCODE_X   Objects_SeqId
namespace
{
struct CSeq_id_find_pred
{
    const char* kSymbols;
    bool operator()(const char ch)
    {
        if (ch<32 || ch>127) // non-ASCII is not allowed
            return true;
        return strchr(kSymbols, ch) != 0;
    }
};

};


// generated classes

BEGIN_NCBI_SCOPE
BEGIN_objects_SCOPE // namespace ncbi::objects::

static const char* sc_SupportedRawDbtags[] = {
    "ATGC",
    "BCMHGSC",
    "BERKELEY",
    "CELERA",
    "GSDB",
    "HOOD",
    "LANLCHGS",
    "LRG",
    "MIPS",
    "NCBI_EXT_ACC",
    "NCBI_GENOMES",
    "NCBI_MITO",
    "PGEC",
    "PID",
    "SGD",
    "SHGC",
    "SRA",
    "TIGR",
    "UOKNOR",
    "UWGC",
    "WASHU",
    "WIBR",
    "WUGSC",
    "dbGSS",
    "dbSTS"
};
DEFINE_STATIC_ARRAY_MAP_WITH_COPY(CStaticArraySet<string>, kSupportedRawDbtags,
                                  sc_SupportedRawDbtags);


// CSeqIdException
const char* CSeqIdException::GetErrCodeString(void) const
{
    switch (GetErrCode()) {
    case eUnknownType:  return "eUnknownType";
    case eFormat:       return "eFormat";
    default:            return CException::GetErrCodeString();
    }
}


// constructor
CSeq_id::CSeq_id(void)
{
    return;
}

// destructor
CSeq_id::~CSeq_id(void)
{
    return;
}


static void s_SplitVersion(const CTempString& acc_in, CTempString& acc,
                           int& ver)
{
    CTempString verstr;
    NStr::SplitInTwo(acc_in, ".", acc, verstr);
    if (verstr.empty()) {
        ver = 0;
    } else {
        ver = NStr::StringToNonNegativeInt(verstr);
        if (ver <= 0) {
            NCBI_THROW(CSeqIdException, eFormat,
                       "Version embedded in accession " + string(acc_in)
                       + " is not a positive integer");
        }
    }
}


const CTextseq_id* CSeq_id::GetTextseq_Id(void) const
{
    switch ( Which() ) {
    case e_Genbank:
        return &GetGenbank();
    case e_Embl:
        return &GetEmbl();
    case e_Ddbj:
        return &GetDdbj();
    case e_Pir:
        return &GetPir();
    case e_Swissprot:
        return &GetSwissprot();
    case e_Other:
        return &GetOther();
    case e_Prf:
        return &GetPrf();
    case e_Tpg:
        return &GetTpg();
    case e_Tpe:
        return &GetTpe();
    case e_Tpd:
        return &GetTpd();
    case e_Gpipe:
        return &GetGpipe();
    case e_Named_annot_track:
        return &GetNamed_annot_track();
    default:
        return 0;
    }
}


inline
void x_Assign(CObject_id& dst, const CObject_id& src)
{
    switch ( src.Which() ) {
    case CObject_id::e_not_set:
        dst.Reset();
        return;
    case CObject_id::e_Id:
        dst.SetId(src.GetId());
        return;
    case CObject_id::e_Str:
        dst.SetStr(src.GetStr());
        return;
    default:
        NCBI_THROW(CSeqIdException, eFormat, "invalid Object-id variant");
    }
}


inline
void x_Assign(CGiimport_id& dst, const CGiimport_id& src)
{
    dst.SetId(src.GetId());
    if ( src.IsSetDb() ) {
        dst.SetDb(src.GetDb());
    }
    else {
        dst.ResetDb();
    }
    if ( src.IsSetRelease() ) {
        dst.SetRelease(src.GetRelease());
    }
    else {
        dst.ResetRelease();
    }
}


inline
void x_Assign(CTextseq_id& dst, const CTextseq_id& src)
{
    if ( src.IsSetName() ) {
        dst.SetName(src.GetName());
    }
    else {
        dst.ResetName();
    }
    if ( src.IsSetAccession() ) {
        dst.SetAccession(src.GetAccession());
    }
    else {
        dst.ResetAccession();
    }
    if ( src.IsSetRelease() ) {
        dst.SetRelease(src.GetRelease());
    }
    else {
        dst.ResetRelease();
    }
    if ( src.IsSetVersion() ) {
        dst.SetVersion(src.GetVersion());
    }
    else {
        dst.ResetVersion();
    }
}


inline
void x_Assign(CDbtag& dst, const CDbtag& src)
{
    dst.SetDb(src.GetDb());
    x_Assign(dst.SetTag(), src.GetTag());
}


inline
void x_Assign(CPatent_seq_id& dst, const CPatent_seq_id& src)
{
    dst.SetSeqid(src.GetSeqid());
    dst.SetCit().Assign(src.GetCit());
}


inline
void x_Assign(CDate& dst, const CDate& src)
{
    dst.Assign(src);
}


inline
void x_Assign(CPDB_seq_id& dst, const CPDB_seq_id& src)
{
    dst.SetMol().Set(src.GetMol());
    if ( src.IsSetChain() ) {
        dst.SetChain(src.GetChain());
    }
    else {
        dst.ResetChain();
    }
    if ( src.IsSetChain_id() ) {
        dst.SetChain_id(src.GetChain_id());
    }
    else {
        dst.ResetChain_id();
    }
    if ( src.IsSetRel() ) {
        dst.SetRel().Assign(src.GetRel());
    }
    else {
        dst.ResetRel();
    }
}


void CSeq_id::Assign(const CSerialObject& obj, ESerialRecursionMode how)
{
    if ( GetTypeInfo() == obj.GetThisTypeInfo() ) {
        const CSeq_id& id = static_cast<const CSeq_id&>(obj);
        switch ( id.Which() ) {
        case e_not_set:
            Reset();
            return;
        case e_Local:
            x_Assign(SetLocal(), id.GetLocal());
            return;
        case e_Gibbsq:
            SetGibbsq(id.GetGibbsq());
            return;
        case e_Gibbmt:
            SetGibbmt(id.GetGibbmt());
            return;
        case e_Giim:
            x_Assign(SetGiim(), id.GetGiim());
            return;
        case e_Pir:
            x_Assign(SetPir(), id.GetPir());
            return;
        case e_Swissprot:
            x_Assign(SetSwissprot(), id.GetSwissprot());
            return;
        case e_Patent:
            x_Assign(SetPatent(), id.GetPatent());
            return;
        case e_Other:
            x_Assign(SetOther(), id.GetOther());
            return;
        case e_General:
            x_Assign(SetGeneral(), id.GetGeneral());
            return;
        case e_Gi:
            SetGi(id.GetGi());
            return;
        case e_Prf:
            x_Assign(SetPrf(), id.GetPrf());
            return;
        case e_Pdb:
            x_Assign(SetPdb(), id.GetPdb());
            return;
        case e_Genbank:
            x_Assign(SetGenbank(), id.GetGenbank());
            return;
        case e_Embl:
            x_Assign(SetEmbl(), id.GetEmbl());
            return;
        case e_Ddbj:
            x_Assign(SetDdbj(), id.GetDdbj());
            return;
        case e_Tpg:
            x_Assign(SetTpg(), id.GetTpg());
            return;
        case e_Tpe:
            x_Assign(SetTpe(), id.GetTpe());
            return;
        case e_Tpd:
            x_Assign(SetTpd(), id.GetTpd());
            return;
        case e_Gpipe:
            x_Assign(SetGpipe(), id.GetGpipe());
            return;
        case e_Named_annot_track:
            x_Assign(SetNamed_annot_track(), id.GetNamed_annot_track());
            return;
        }
    }
    CSerialObject::Assign(obj, how);
}


inline bool CanCmpAcc(CSeq_id::E_Choice choice)
{
    switch ( choice ) {
    case CSeq_id::e_Genbank:
    case CSeq_id::e_Embl:
    case CSeq_id::e_Ddbj:
    case CSeq_id::e_Tpg:
    case CSeq_id::e_Tpe:
    case CSeq_id::e_Tpd:
    case CSeq_id::e_Gpipe:
    case CSeq_id::e_Named_annot_track:
        return true;
    default:
        return false;
    }
}


// Compare() - are SeqIds equivalent?
CSeq_id::E_SIC CSeq_id::Compare(const CSeq_id& sid2) const
{
    if ( Which() != sid2.Which() ) { // Only one case where this will work
        if (!CanCmpAcc(Which()) || !CanCmpAcc(sid2.Which())) {
            return e_DIFF;
        }
        const CTextseq_id *tsip1 = GetTextseq_Id();
        if ( !tsip1 )
            return e_DIFF;

        const CTextseq_id *tsip2 = sid2.GetTextseq_Id();
        if ( !tsip2 )
            return e_DIFF;

        if ( tsip1->Match(*tsip2) ) // id Textseq_id match
            return e_YES;
        else
            return e_NO;
    }

    switch ( Which() ) { // Now we only need to know one
    case e_Local:
        return GetLocal().Match(sid2.GetLocal()) ? e_YES : e_NO;
    case e_Gibbsq:
        return GetGibbsq() == sid2.GetGibbsq() ? e_YES : e_NO;
    case e_Gibbmt:
        return GetGibbmt() == sid2.GetGibbmt() ? e_YES : e_NO;
    case e_Giim:
        return GetGiim().GetId() == sid2.GetGiim().GetId() ? e_YES : e_NO;
    case e_Pir:
        return GetPir().Match(sid2.GetPir()) ? e_YES : e_NO;
    case e_Swissprot:
        return GetSwissprot().Match(sid2.GetSwissprot()) ? e_YES : e_NO;
    case e_Patent:
        return GetPatent().Match(sid2.GetPatent()) ? e_YES : e_NO;
    case e_Other:
        return GetOther().Match(sid2.GetOther()) ? e_YES : e_NO;
    case e_General:
        if ( GetGeneral().Match(sid2.GetGeneral()) ) {
            return e_YES;
        }
        else if ( NStr::CompareNocase(GetGeneral().GetDb(),
            sid2.GetGeneral().GetDb()) ) {
                return e_DIFF;
        }
        return e_NO;
    case e_Gi:
        return GetGi() == sid2.GetGi() ? e_YES : e_NO;
    case e_Prf:
        return GetPrf().Match(sid2.GetPrf()) ? e_YES : e_NO;
    case e_Pdb:
        return GetPdb().Match(sid2.GetPdb()) ? e_YES : e_NO;
    case e_Genbank:
        return GetGenbank().Match(sid2.GetGenbank()) ? e_YES : e_NO;
    case e_Embl:
        return GetEmbl().Match(sid2.GetEmbl()) ? e_YES : e_NO;
    case e_Ddbj:
        return GetDdbj().Match(sid2.GetDdbj()) ? e_YES : e_NO;
    case e_Tpg:
        return GetTpg().Match(sid2.GetTpg()) ? e_YES : e_NO;
    case e_Tpe:
        return GetTpe().Match(sid2.GetTpe()) ? e_YES : e_NO;
    case e_Tpd:
        return GetTpd().Match(sid2.GetTpd()) ? e_YES : e_NO;
    case e_Gpipe:
        return GetGpipe().Match(sid2.GetGpipe()) ? e_YES : e_NO;
    case e_Named_annot_track:
        return GetNamed_annot_track().Match(sid2.GetNamed_annot_track())
            ? e_YES : e_NO;
    default:
        return e_error;
    }
}


int CSeq_id::CompareOrdered(const CSeq_id& sid2) const
{
    int ret = Which() - sid2.Which();
    if ( ret != 0 ) {
        return ret;
    }
    const CTextseq_id *tsip1 = GetTextseq_Id();
    const CTextseq_id *tsip2 = sid2.GetTextseq_Id();
    if ( tsip1 && tsip2 ) {
        return tsip1->Compare(*tsip2);
    }
    switch ( Which() ) { // Now we only need to know one
    case e_Local:
        return GetLocal().Compare(sid2.GetLocal());
    case e_Gibbsq:
        return GetGibbsq() - sid2.GetGibbsq();
    case e_Gibbmt:
        return GetGibbmt() - sid2.GetGibbmt();
    case e_Giim:
        return GetGiim().GetId() - sid2.GetGiim().GetId();
    case e_Patent:
        return GetPatent().Compare(sid2.GetPatent());
    case e_General:
        return GetGeneral().Compare(sid2.GetGeneral());
    case e_Gi:
        if ( GetGi() < sid2.GetGi() ) {
            return -1;
        }
        else {
            return GetGi() > sid2.GetGi();
        }
    case e_Pdb:
        return GetPdb().Compare(sid2.GetPdb());
    default:
        return 0;
    }
}

typedef SStaticPair<const char*, CSeq_id::E_Choice> TChoiceMapEntry;
// used for binary searching; must be in order.
static const TChoiceMapEntry sc_ChoiceArray[] = {
    { "???",          CSeq_id::e_not_set },
    { "bbm",          CSeq_id::e_Gibbmt },
    { "bbs",          CSeq_id::e_Gibbsq },
    { "dbj",          CSeq_id::e_Ddbj },
// removed aliases, see s_HasFastaTag and x_Init
//    { "ddbj",         CSeq_id::e_Ddbj },
    { "emb",          CSeq_id::e_Embl },
//    { "embl",         CSeq_id::e_Embl },
    { "gb",           CSeq_id::e_Genbank },
//    { "genbank",      CSeq_id::e_Genbank },
//    { "general",      CSeq_id::e_General },
    { "gi",           CSeq_id::e_Gi },
//    { "gibbmt",       CSeq_id::e_Gibbmt },
    { "gibbsq",       CSeq_id::e_Gibbsq },
//    { "giim",         CSeq_id::e_Giim },
    { "gim",          CSeq_id::e_Giim },
    { "gnl",          CSeq_id::e_General },
//    { "gpipe",        CSeq_id::e_Gpipe },
    { "gpp",          CSeq_id::e_Gpipe },
    { "lcl",          CSeq_id::e_Local },
//    { "local",        CSeq_id::e_Local },
//    { "named_annot_track", CSeq_id::e_Named_annot_track },
    { "nat",          CSeq_id::e_Named_annot_track },
    { "not_set",      CSeq_id::e_not_set },
//    { "oth",          CSeq_id::e_Other }, // deprecated vs. ref
//    { "other",        CSeq_id::e_Other },
    { "pat",          CSeq_id::e_Patent },
//    { "patent",       CSeq_id::e_Patent },
    { "pdb",          CSeq_id::e_Pdb },
    { "pgp",          CSeq_id::e_Patent },
    { "pir",          CSeq_id::e_Pir },
    { "prf",          CSeq_id::e_Prf },
    { "ref",          CSeq_id::e_Other },
    { "sp",           CSeq_id::e_Swissprot },
//    { "swissprot",    CSeq_id::e_Swissprot },
    { "tpd",          CSeq_id::e_Tpd },
    { "tpe",          CSeq_id::e_Tpe },
    { "tpg",          CSeq_id::e_Tpg },
    { "tr",           CSeq_id::e_Swissprot }
};
typedef CStaticPairArrayMap<CTempString, CSeq_id::E_Choice,
                            PNocase_Generic<CTempString> > TChoiceMap;
DEFINE_STATIC_ARRAY_MAP_WITH_COPY(TChoiceMap, sc_ChoiceMap, sc_ChoiceArray);


static const char* const s_TextId[CSeq_id::e_MaxChoice+1] =
{   // FASTA_LONG formats
    "???" , // not-set = ???
    "lcl",  // local = lcl|integer or string
    "bbs",  // gibbsq = bbs|integer
    "bbm",  // gibbmt = bbm|integer
    "gim",  // giim = gim|integer
    "gb",   // genbank = gb|accession|locus
    "emb",  // embl = emb|accession|locus
    "pir",  // pir = pir|accession|name
    "sp",   // swissprot = sp|accession|name *OR* tr|accession|name
    "pat",  // patent = pat|country|patent number (string)|seq number (integer)
            //     *OR* pgp|country|application number|seq number
    "ref",  // other = ref|accession|name|release - changed from oth to ref
    "gnl",  // general = gnl|database(string)|id (string or number)
    "gi",   // gi = gi|integer
    "dbj",  // ddbj = dbj|accession|locus
    "prf",  // prf = prf|accession|name
    "pdb",  // pdb = pdb|entry name (string)|chain id (char)
    "tpg",  // tpg = tpg|accession|name
    "tpe",  // tpe = tpe|accession|name
    "tpd",  // tpd = tpd|accession|name
    "gpp",  // gpipe = gpp|accession|name
    "nat",  // named_annot_track = nat|accession|name
    ""  // Placeholder for end of list
};

CSeq_id::E_Choice CSeq_id::WhichInverseSeqId(const CTempString& SeqIdCode)
{
    TChoiceMap::const_iterator it = sc_ChoiceMap.find(SeqIdCode);
    if (it == sc_ChoiceMap.end()) {
        return e_not_set;
    } else {
        return it->second;
    }
}

const char* CSeq_id::WhichFastaTag(E_Choice choice)
{
    if (choice >= 0  &&  choice < ArraySize(s_TextId)) {
        return s_TextId[choice];
    } else {
        return kEmptyCStr;
    }
}

static inline bool s_HasFastaTag(const CTempString& s)
{
    // > rather than >= because there should be content after the bar.
    if (s.size() > 3  &&  s[2] == '|') {
        return true;
    } else if (s.size() > 4  &&  s[3] == '|') {
        return true;
    } else {
        return false;
    }
}



static CSeq_id::E_Choice s_CheckForFastaTag(const CTempString& s)
{
    // > rather than >= because there should be content after the bar.
    if (s.size() > 3  &&  s[2] == '|') {
        return CSeq_id::WhichInverseSeqId(s.substr(0, 2));
    } else if (s.size() > 4  &&  s[3] == '|') {
        return CSeq_id::WhichInverseSeqId(s.substr(0, 3));
    } else {
        return CSeq_id::e_not_set;
    }
}


typedef SStaticPair<const char*, CSeq_id::EAccessionInfo> TAccInfoMapEntry;
// used for binary searching; must be in order.
static const TAccInfoMapEntry sc_AccInfoArray[] = {
    { "ambiguous_nuc",           CSeq_id::eAcc_ambiguous_nuc },
    { "ddbj_con",                CSeq_id::eAcc_ddbj_con },
    { "ddbj_dirsub",             CSeq_id::eAcc_ddbj_dirsub },
    { "ddbj_est",                CSeq_id::eAcc_ddbj_est },
    { "ddbj_genome",             CSeq_id::eAcc_ddbj_genome },
    { "ddbj_gss",                CSeq_id::eAcc_ddbj_gss },
    { "ddbj_htgs",               CSeq_id::eAcc_ddbj_htgs },
    { "ddbj_mga",                CSeq_id::eAcc_ddbj_mga },
    { "ddbj_mrna",               CSeq_id::eAcc_ddbj_mrna },
    { "ddbj_other",              CSeq_id::eAcc_ddbj_other },
    { "ddbj_other_nuc",          CSeq_id::eAcc_ddbj_other_nuc },
    { "ddbj_patent",             CSeq_id::eAcc_ddbj_patent },
    { "ddbj_prot",               CSeq_id::eAcc_ddbj_prot },
    { "ddbj_targeted_nuc",       CSeq_id::eAcc_ddbj_targeted_nuc },
    { "ddbj_targetedm_nuc",      CSeq_id::eAcc_ddbj_targetedm_nuc },
    { "ddbj_targetedv_nuc",      CSeq_id::eAcc_ddbj_targetedv_nuc },
    { "ddbj_targetedvm_nuc",     CSeq_id::eAcc_ddbj_targetedvm_nuc },
    { "ddbj_tpa_chromosome",     CSeq_id::eAcc_ddbj_tpa_chromosome },
    { "ddbj_tpa_con",            CSeq_id::eAcc_ddbj_tpa_con },
    { "ddbj_tpa_nuc",            CSeq_id::eAcc_ddbj_tpa_nuc },
    { "ddbj_tpa_other",          CSeq_id::eAcc_ddbj_tpa_other },
    { "ddbj_tpa_prot",           CSeq_id::eAcc_ddbj_tpa_prot },
    { "ddbj_tpa_targeted_nuc",   CSeq_id::eAcc_ddbj_tpa_targeted_nuc },
    { "ddbj_tpa_targetedm_nuc",  CSeq_id::eAcc_ddbj_tpa_targetedm_nuc },
    { "ddbj_tpa_targetedv_nuc",  CSeq_id::eAcc_ddbj_tpa_targetedv_nuc },
    { "ddbj_tpa_targetedvm_nuc", CSeq_id::eAcc_ddbj_tpa_targetedvm_nuc },
    { "ddbj_tpa_tsa_nuc",        CSeq_id::eAcc_ddbj_tpa_tsa_nuc },
    { "ddbj_tpa_tsa_prot",       CSeq_id::eAcc_ddbj_tpa_tsa_prot },
    { "ddbj_tpa_tsam_nuc",       CSeq_id::eAcc_ddbj_tpa_tsam_nuc },
    { "ddbj_tpa_tsam_prot",      CSeq_id::eAcc_ddbj_tpa_tsam_prot },
    { "ddbj_tpa_tsav_nuc",       CSeq_id::eAcc_ddbj_tpa_tsav_nuc },
    { "ddbj_tpa_tsav_prot",      CSeq_id::eAcc_ddbj_tpa_tsav_prot },
    { "ddbj_tpa_tsavm_nuc",      CSeq_id::eAcc_ddbj_tpa_tsavm_nuc },
    { "ddbj_tpa_tsavm_prot",     CSeq_id::eAcc_ddbj_tpa_tsavm_prot },
    { "ddbj_tpa_wgs_nuc",        CSeq_id::eAcc_ddbj_tpa_wgs_nuc },
    { "ddbj_tpa_wgs_prot",       CSeq_id::eAcc_ddbj_tpa_wgs_prot },
    { "ddbj_tpa_wgsm_nuc",       CSeq_id::eAcc_ddbj_tpa_wgsm_nuc },
    { "ddbj_tpa_wgsm_prot",      CSeq_id::eAcc_ddbj_tpa_wgsm_prot },
    { "ddbj_tpa_wgsv_nuc",       CSeq_id::eAcc_ddbj_tpa_wgsv_nuc },
    { "ddbj_tpa_wgsv_prot",      CSeq_id::eAcc_ddbj_tpa_wgsv_prot },
    { "ddbj_tpa_wgsvm_nuc",      CSeq_id::eAcc_ddbj_tpa_wgsvm_nuc },
    { "ddbj_tpa_wgsvm_prot",     CSeq_id::eAcc_ddbj_tpa_wgsvm_prot },
    { "ddbj_tsa_nuc",            CSeq_id::eAcc_ddbj_tsa_nuc },
    { "ddbj_tsa_prot",           CSeq_id::eAcc_ddbj_tsa_prot },
    { "ddbj_tsam_nuc",           CSeq_id::eAcc_ddbj_tsam_nuc },
    { "ddbj_tsam_prot",          CSeq_id::eAcc_ddbj_tsam_prot },
    { "ddbj_tsav_nuc",           CSeq_id::eAcc_ddbj_tsav_nuc },
    { "ddbj_tsav_prot",          CSeq_id::eAcc_ddbj_tsav_prot },
    { "ddbj_tsavm_nuc",          CSeq_id::eAcc_ddbj_tsavm_nuc },
    { "ddbj_tsavm_prot",         CSeq_id::eAcc_ddbj_tsavm_prot },
    { "ddbj_wgs_nuc",            CSeq_id::eAcc_ddbj_wgs_nuc },
    { "ddbj_wgs_prot",           CSeq_id::eAcc_ddbj_wgs_prot },
    { "ddbj_wgsm_nuc",           CSeq_id::eAcc_ddbj_wgsm_nuc },
    { "ddbj_wgsm_prot",          CSeq_id::eAcc_ddbj_wgsm_prot },
    { "ddbj_wgsv_nuc",           CSeq_id::eAcc_ddbj_wgsv_nuc },
    { "ddbj_wgsv_prot",          CSeq_id::eAcc_ddbj_wgsv_prot },
    { "ddbj_wgsvm_nuc",          CSeq_id::eAcc_ddbj_wgsvm_nuc },
    { "ddbj_wgsvm_prot",         CSeq_id::eAcc_ddbj_wgsvm_prot },
    { "embl_con",                CSeq_id::eAcc_embl_con },
    { "embl_ddbj",               CSeq_id::eAcc_embl_ddbj },
    { "embl_dirsub",             CSeq_id::eAcc_embl_dirsub },
    { "embl_est",                CSeq_id::eAcc_embl_est },
    { "embl_genome",             CSeq_id::eAcc_embl_genome },
    { "embl_gss",                CSeq_id::eAcc_embl_gss },
    { "embl_htgs",               CSeq_id::eAcc_embl_htgs },
    { "embl_mga",                CSeq_id::eAcc_embl_mga },
    { "embl_other",              CSeq_id::eAcc_embl_other },
    { "embl_other_nuc",          CSeq_id::eAcc_embl_other_nuc },
    { "embl_patent",             CSeq_id::eAcc_embl_patent },
    { "embl_prot",               CSeq_id::eAcc_embl_prot },
    { "embl_tpa_nuc",            CSeq_id::eAcc_embl_tpa_nuc },
    { "embl_tpa_other",          CSeq_id::eAcc_embl_tpa_other },
    { "embl_tpa_prot",           CSeq_id::eAcc_embl_tpa_prot },
    { "embl_tpa_tsa_nuc",        CSeq_id::eAcc_embl_tpa_tsa_nuc },
    { "embl_tpa_tsa_prot",       CSeq_id::eAcc_embl_tpa_tsa_prot },
    { "embl_tpa_tsam_nuc",       CSeq_id::eAcc_embl_tpa_tsam_nuc },
    { "embl_tpa_tsam_prot",      CSeq_id::eAcc_embl_tpa_tsam_prot },
    { "embl_tpa_tsav_nuc",       CSeq_id::eAcc_embl_tpa_tsav_nuc },
    { "embl_tpa_tsav_prot",      CSeq_id::eAcc_embl_tpa_tsav_prot },
    { "embl_tpa_tsavm_nuc",      CSeq_id::eAcc_embl_tpa_tsavm_nuc },
    { "embl_tpa_tsavm_prot",     CSeq_id::eAcc_embl_tpa_tsavm_prot },
    { "embl_tpa_wgs_nuc",        CSeq_id::eAcc_embl_tpa_wgs_nuc },
    { "embl_tpa_wgs_prot",       CSeq_id::eAcc_embl_tpa_wgs_prot },
    { "embl_tpa_wgsm_nuc",       CSeq_id::eAcc_embl_tpa_wgsm_nuc },
    { "embl_tpa_wgsm_prot",      CSeq_id::eAcc_embl_tpa_wgsm_prot },
    { "embl_tpa_wgsv_nuc",       CSeq_id::eAcc_embl_tpa_wgsv_nuc },
    { "embl_tpa_wgsv_prot",      CSeq_id::eAcc_embl_tpa_wgsv_prot },
    { "embl_tpa_wgsvm_nuc",      CSeq_id::eAcc_embl_tpa_wgsvm_nuc },
    { "embl_tpa_wgsvm_prot",     CSeq_id::eAcc_embl_tpa_wgsvm_prot },
    { "embl_tsa_nuc",            CSeq_id::eAcc_embl_tsa_nuc },
    { "embl_tsa_prot",           CSeq_id::eAcc_embl_tsa_prot },
    { "embl_tsam_nuc",           CSeq_id::eAcc_embl_tsam_nuc },
    { "embl_tsam_prot",          CSeq_id::eAcc_embl_tsam_prot },
    { "embl_tsav_nuc",           CSeq_id::eAcc_embl_tsav_nuc },
    { "embl_tsav_prot",          CSeq_id::eAcc_embl_tsav_prot },
    { "embl_tsavm_nuc",          CSeq_id::eAcc_embl_tsavm_nuc },
    { "embl_tsavm_prot",         CSeq_id::eAcc_embl_tsavm_prot },
    { "embl_wgs_nuc",            CSeq_id::eAcc_embl_wgs_nuc },
    { "embl_wgs_prot",           CSeq_id::eAcc_embl_wgs_prot },
    { "embl_wgsm_nuc",           CSeq_id::eAcc_embl_wgsm_nuc },
    { "embl_wgsm_prot",          CSeq_id::eAcc_embl_wgsm_prot },
    { "embl_wgsv_nuc",           CSeq_id::eAcc_embl_wgsv_nuc },
    { "embl_wgsv_prot",          CSeq_id::eAcc_embl_wgsv_prot },
    { "embl_wgsvm_nuc",          CSeq_id::eAcc_embl_wgsvm_nuc },
    { "embl_wgsvm_prot",         CSeq_id::eAcc_embl_wgsvm_prot },
    { "gb_backbone",             CSeq_id::eAcc_gb_backbone },
    { "gb_cdna",                 CSeq_id::eAcc_gb_cdna },
    { "gb_chromosome",           CSeq_id::eAcc_gb_chromosome },
    { "gb_con",                  CSeq_id::eAcc_gb_con },
    { "gb_ddbj",                 CSeq_id::eAcc_gb_ddbj },
    { "gb_dirsub",               CSeq_id::eAcc_gb_dirsub },
    { "gb_embl",                 CSeq_id::eAcc_gb_embl },
    { "gb_embl_ddbj",            CSeq_id::eAcc_gb_embl_ddbj },
    { "gb_est",                  CSeq_id::eAcc_gb_est },
    { "gb_genome",               CSeq_id::eAcc_gb_genome },
    { "gb_gsdb",                 CSeq_id::eAcc_gb_gsdb },
    { "gb_gss",                  CSeq_id::eAcc_gb_gss },
    { "gb_htgs",                 CSeq_id::eAcc_gb_htgs },
    { "gb_mga",                  CSeq_id::eAcc_gb_mga },
    { "gb_optical_map",          CSeq_id::eAcc_gb_optical_map },
    { "gb_other",                CSeq_id::eAcc_gb_other },
    { "gb_other_nuc",            CSeq_id::eAcc_gb_other_nuc },
    { "gb_patent",               CSeq_id::eAcc_gb_patent },
    { "gb_patent_prot",          CSeq_id::eAcc_gb_patent_prot },
    { "gb_prot",                 CSeq_id::eAcc_gb_prot },
    { "gb_segset",               CSeq_id::eAcc_gb_segset },
    { "gb_sts",                  CSeq_id::eAcc_gb_sts },
    { "gb_targeted_nuc",         CSeq_id::eAcc_gb_targeted_nuc },
    { "gb_tpa_chromosome",       CSeq_id::eAcc_gb_tpa_chromosome },
    { "gb_tpa_con",              CSeq_id::eAcc_gb_tpa_con },
    { "gb_tpa_nuc",              CSeq_id::eAcc_gb_tpa_nuc },
    { "gb_tpa_other",            CSeq_id::eAcc_gb_tpa_other },
    { "gb_tpa_prot",             CSeq_id::eAcc_gb_tpa_prot },
    { "gb_tpa_segset",           CSeq_id::eAcc_gb_tpa_segset },
    { "gb_tpa_wgs_nuc",          CSeq_id::eAcc_gb_tpa_wgs_nuc },
    { "gb_tpa_wgs_prot",         CSeq_id::eAcc_gb_tpa_wgs_prot },
    { "gb_tpa_wgsm_nuc",         CSeq_id::eAcc_gb_tpa_wgsm_nuc },
    { "gb_tpa_wgsm_prot",        CSeq_id::eAcc_gb_tpa_wgsm_prot },
    { "gb_tpa_wgsv_nuc",         CSeq_id::eAcc_gb_tpa_wgsv_nuc },
    { "gb_tpa_wgsv_prot",        CSeq_id::eAcc_gb_tpa_wgsv_prot },
    { "gb_tpa_wgsvm_nuc",        CSeq_id::eAcc_gb_tpa_wgsvm_nuc },
    { "gb_tpa_wgsvm_prot",       CSeq_id::eAcc_gb_tpa_wgsvm_prot },
    { "gb_tsa_nuc",              CSeq_id::eAcc_gb_tsa_nuc },
    { "gb_tsa_prot",             CSeq_id::eAcc_gb_tsa_prot },
    { "gb_tsam_nuc",             CSeq_id::eAcc_gb_tsam_nuc },
    { "gb_tsam_prot",            CSeq_id::eAcc_gb_tsam_prot },
    { "gb_tsav_nuc",             CSeq_id::eAcc_gb_tsav_nuc },
    { "gb_tsav_prot",            CSeq_id::eAcc_gb_tsav_prot },
    { "gb_tsavm_nuc",            CSeq_id::eAcc_gb_tsavm_nuc },
    { "gb_tsavm_prot",           CSeq_id::eAcc_gb_tsavm_prot },
    { "gb_wgs_nuc",              CSeq_id::eAcc_gb_wgs_nuc },
    { "gb_wgs_prot",             CSeq_id::eAcc_gb_wgs_prot },
    { "gb_wgsm_nuc",             CSeq_id::eAcc_gb_wgsm_nuc },
    { "gb_wgsm_prot",            CSeq_id::eAcc_gb_wgsm_prot },
    { "gb_wgsv_nuc",             CSeq_id::eAcc_gb_wgsv_nuc },
    { "gb_wgsv_prot",            CSeq_id::eAcc_gb_wgsv_prot },
    { "gb_wgsvm_nuc",            CSeq_id::eAcc_gb_wgsvm_nuc },
    { "gb_wgsvm_prot",           CSeq_id::eAcc_gb_wgsvm_prot },
    { "general",                 CSeq_id::eAcc_general },
    { "general_nuc",             CSeq_id::eAcc_general_nuc },
    { "general_prot",            CSeq_id::eAcc_general_prot },
    { "gi",                      CSeq_id::eAcc_gi },
    { "gibbmt",                  CSeq_id::eAcc_gibbmt },
    { "gibbsq",                  CSeq_id::eAcc_gibbsq },
    { "giim",                    CSeq_id::eAcc_giim },
    { "gpipe_chromosome",        CSeq_id::eAcc_gpipe_chromosome },
    { "gpipe_genomic",           CSeq_id::eAcc_gpipe_genomic },
    { "gpipe_mrna",              CSeq_id::eAcc_gpipe_mrna },
    { "gpipe_ncrna",             CSeq_id::eAcc_gpipe_ncrna },
    { "gpipe_other_nuc",         CSeq_id::eAcc_gpipe_other_nuc },
    { "gpipe_prot",              CSeq_id::eAcc_gpipe_prot },
    { "gpipe_scaffold",          CSeq_id::eAcc_gpipe_scaffold },
    { "gpipe_unreserved",        CSeq_id::eAcc_gpipe_unreserved },
    { "gsdb_dirsub",             CSeq_id::eAcc_gsdb_dirsub },
    { "local",                   CSeq_id::eAcc_local },
    { "maybe_ddbj",              CSeq_id::eAcc_maybe_ddbj },
    { "maybe_embl",              CSeq_id::eAcc_maybe_embl },
    { "maybe_gb",                CSeq_id::eAcc_maybe_gb },
    { "named_annot_track",       CSeq_id::eAcc_named_annot_track },
    { "patent",                  CSeq_id::eAcc_patent },
    { "pdb",                     CSeq_id::eAcc_pdb },
    { "pir",                     CSeq_id::eAcc_pir },
    { "prf",                     CSeq_id::eAcc_prf },
    { "refseq_chromosome",       CSeq_id::eAcc_refseq_chromosome },
    { "refseq_chromosome_ncbo",  CSeq_id::eAcc_refseq_chromosome_ncbo },
    { "refseq_contig",           CSeq_id::eAcc_refseq_contig },
    { "refseq_contig_ncbo",      CSeq_id::eAcc_refseq_contig_ncbo },
    { "refseq_genome",           CSeq_id::eAcc_refseq_genome },
    { "refseq_genomic",          CSeq_id::eAcc_refseq_genomic },
    { "refseq_mrna",             CSeq_id::eAcc_refseq_mrna },
    { "refseq_mrna_predicted",   CSeq_id::eAcc_refseq_mrna_predicted },
    { "refseq_ncrna",            CSeq_id::eAcc_refseq_ncrna },
    { "refseq_ncrna_predicted",  CSeq_id::eAcc_refseq_ncrna_predicted },
    { "refseq_prot",             CSeq_id::eAcc_refseq_prot },
    { "refseq_prot_predicted",   CSeq_id::eAcc_refseq_prot_predicted },
    { "refseq_unique_prot",      CSeq_id::eAcc_refseq_unique_prot },
    { "refseq_unreserved",       CSeq_id::eAcc_refseq_unreserved },
    { "refseq_wgs_intermed",     CSeq_id::eAcc_refseq_wgs_intermed },
    { "refseq_wgs_nuc",          CSeq_id::eAcc_refseq_wgs_nuc },
    { "refseq_wgs_prot",         CSeq_id::eAcc_refseq_wgs_prot },
    { "refseq_wgsm_intermed",    CSeq_id::eAcc_refseq_wgsm_intermed },
    { "refseq_wgsm_nuc",         CSeq_id::eAcc_refseq_wgsm_nuc },
    { "refseq_wgsm_prot",        CSeq_id::eAcc_refseq_wgsm_prot },
    { "refseq_wgsv_intermed",    CSeq_id::eAcc_refseq_wgsv_intermed },
    { "refseq_wgsv_nuc",         CSeq_id::eAcc_refseq_wgsv_nuc },
    { "refseq_wgsv_prot",        CSeq_id::eAcc_refseq_wgsv_prot },
    { "refseq_wgsvm_intermed",   CSeq_id::eAcc_refseq_wgsvm_intermed },
    { "refseq_wgsvm_nuc",        CSeq_id::eAcc_refseq_wgsvm_nuc },
    { "refseq_wgsvm_prot",       CSeq_id::eAcc_refseq_wgsvm_prot },
    { "swissprot",               CSeq_id::eAcc_swissprot },
    { "unknown",                 CSeq_id::eAcc_unknown },
    { "unreserved_nuc",          CSeq_id::eAcc_unreserved_nuc },
    { "unreserved_prot",         CSeq_id::eAcc_unreserved_prot }
};
typedef CStaticPairArrayMap<CTempString, CSeq_id::EAccessionInfo,
                            PNocase_Generic<CTempString> > TAccInfoMap;
DEFINE_STATIC_ARRAY_MAP_WITH_COPY(TAccInfoMap, sc_AccInfoMap, sc_AccInfoArray);

static const char kDigits[] = "0123456789";
// Maximum number of varying final digits for which it's practical to
// use a bit vector; this BV-based representation additionally
// requires a constant alphabetical prefix.  (In other situations,
// this code sticks to a traditional representation that has no such
// limits but doesn't scale as well to large numbers of special cases.)
static const unsigned int kMaxSmallSpecialDigits = 9;
static const bm::bvector<>::size_type kBVSizes[kMaxSmallSpecialDigits + 1] = {
    1,
    10,
    100,
    1000,
    10000,
    100000,
    1000000,
    10000000,
    100000000,
    1000000000
};

struct SAccGuide : public CObject
{
    typedef CSeq_id::EAccessionInfo TAccInfo;
    typedef map<string, TAccInfo>   TPrefixes;
    typedef pair<string, TAccInfo>  TPair;
    typedef list<TPair>             TPairs; // not vector -- need stable ptrs
    typedef map<string, TPair>      TBigSpecialMap; // last -> first -> value
    typedef pair<bm::bvector<>, TAccInfo> TSmallSpecialOption;
    typedef multimap<string, TSmallSpecialOption> TSmallSpecialMap;
    typedef unsigned int            TFormatCode;
    typedef pair<string, string>    TFallback; // fallback, refinement
    typedef map<const TAccInfo*, TFallback> TFallbackMap;

    struct SSubMap {
        TPrefixes         prefixes;
        TPairs            wildcards;
        TBigSpecialMap    big_specials;
        TSmallSpecialMap  small_specials;
    };
    typedef map<TFormatCode, SSubMap> TMainMap;

    struct SHints {
        SHints()
            : prev_type(CSeq_id::eAcc_unknown), prev_submap(NULL),
              prev_special_format(0),
              prev_special_type(CSeq_id::eAcc_unknown),
              prev_special_base_type(CSeq_id::eAcc_unknown),
              special2_submap(nullptr), version(1)
            {}

        TAccInfo FindAccInfo(CTempString name);
        TAccInfo FindSpecial(const SAccGuide& guide, TFormatCode fmt,
                             CTempString acc_or_range);
        SSubMap& FindSubMap(TMainMap& rules, TFormatCode fmt);
        
        TAccInfo              prev_type;
        CTempString           prev_type_name;
        TMainMap::value_type* prev_submap;
        TBigSpecialMap::iterator   prev_big_special;
        TSmallSpecialMap::iterator prev_small_special;
        TFormatCode           prev_special_format;
        string                prev_special_key;
        CTempString           prev_special_type_name;
        string                prev_special_base_key;
        TAccInfo              prev_special_type;
        TAccInfo              prev_special_base_type;
        string                prev_special2_acc;
        map<string, CTempString> default_fallbacks;
        string                special2_name;
        unique_ptr<string>    special2_old_name;
        SSubMap*              special2_submap;
        TAccInfo              special2_type;
        unsigned int          version;
    };
    
    SAccGuide(void);
    SAccGuide(const string& filename)
        : count(0)
        { x_Load(filename); }
    SAccGuide(ILineReader& lr)
        : count(0)
        { x_Load(lr); }

    void AddRule(const CTempString& rule, SHints& hints);
    const TAccInfo& Find(TFormatCode fmt, const CTempString& acc_or_pfx,
                         string* key_used = NULL) const;
    static TFormatCode s_Key(unsigned short letters, unsigned short digits)
        { return TFormatCode(letters) << 16 | digits; }

    unsigned int count;
    TMainMap     rules;
    TPrefixes    general;
    TFallbackMap fallbacks;

private:
    void x_Load(const string& filename);
    void x_Load(ILineReader& lr);
    void x_InitGeneral(void);
    void x_AddSpecial(SSubMap& submap, SHints& hints, TFormatCode fmt,
                      CTempString from, CTempString to, TAccInfo value,
                      const string* old_name, const CTempString& new_name);
    static bm::bvector_size_type x_SplitSpecial(CTempString& acc,
                                                TFormatCode fmt);
};

static const SAccGuide::TAccInfo kUnrecognized
    = static_cast<SAccGuide::TAccInfo>(-1);

inline
SAccGuide::TAccInfo SAccGuide::SHints::FindAccInfo(CTempString name)
{
    if (name == prev_type_name) {
        return prev_type;
    } else {
        TAccInfoMap::const_iterator it = sc_AccInfoMap.find(name);
        if (it == sc_AccInfoMap.end()) {
            return kUnrecognized;
        } else {
            prev_special_key.clear();
            prev_type_name = it->first;
            return prev_type = it->second;
        }
    }
}

inline
SAccGuide::TAccInfo SAccGuide::SHints::FindSpecial(const SAccGuide& guide,
                                                   TFormatCode fmt,
                                                   CTempString acc_or_range)
{
    CTempString pfx(acc_or_range, 0, fmt >> 16);
    if (fmt == prev_special_format) {
        if (acc_or_range == prev_special_key) {
            prev_special_type = prev_type;
            prev_special_type_name = prev_type_name;
            return prev_type;
        } else if (pfx == prev_special_base_key) {
            return prev_special_base_type;
        }
    }
    prev_special_format    = fmt;
    prev_special_base_key  = pfx;
    prev_special_base_type = guide.Find(fmt, pfx);
    return prev_special_base_type;
}

inline
SAccGuide::SSubMap& SAccGuide::SHints::FindSubMap(SAccGuide::TMainMap& rules,
                                                  SAccGuide::TFormatCode fmt)
{
    if (prev_submap != NULL  &&  prev_submap->first == fmt) {
        return prev_submap->second;
    } else {
        SAccGuide::TMainMap::iterator it = rules.lower_bound(fmt);
        if (it == rules.end() || it->first != fmt) {
            it = rules.insert(it, make_pair(fmt, SAccGuide::SSubMap()));
        }
        prev_submap = &*it;
        prev_big_special   = it->second.big_specials.end();
        prev_small_special = it->second.small_specials.end();
        return it->second;
    }
}

void SAccGuide::AddRule(const CTempString& rule, SHints& hints)
{
    CTempString         tmp1, tmp2;
    vector<CTempStringEx> tokens;
    SIZE_TYPE           pos, pos2;

    ++count;
    tmp1.assign(rule, 0, rule.find('#')); // strip comment
    if (tmp1.empty())
        return;
    tokens.reserve(3);
    NStr::Split(tmp1, " \t", tokens,
                NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate);
    if (tokens.empty()) {
        return;
    } else if (tokens.size() == 2
               &&  NStr::EqualNocase(tokens[0], "version")) {
        hints.special2_submap = nullptr;
        hints.version = NStr::StringToUInt(tokens[1], NStr::fConvErr_NoThrow);
        if (hints.version > 2  ||  hints.version < 1) {
            ERR_POST_X(2, "SAccGuide::AddRule: " << count
                          << ": Unsupported version " << tokens[1]);
            return;
        }
    } else if ((pos = tokens[0].find('+')) != NPOS
               &&  (tokens.size() == 3
                    ||  (tokens.size() == 4  &&  tokens[3] == "*"))) {
        hints.special2_submap = nullptr;
        // _VERIFY(NStr::SplitInTwo(tokens[0], "+", tmp1, tmp2));
        tmp1.assign(tokens[0], 0, pos);
        tmp2.assign(tokens[0], pos + 1, NPOS);
        TFormatCode fmt
            = s_Key(NStr::StringToUInt(tmp1, NStr::fConvErr_NoThrow),
                    NStr::StringToUInt(tmp2, NStr::fConvErr_NoThrow));
        TAccInfo value = hints.FindAccInfo(tokens[2]);
        unique_ptr<string> old_name;
        if (value == kUnrecognized) {
            string   key_used;
            TAccInfo old = Find(fmt, tokens[1], &key_used);
            old_name.reset(new string);
            if (old != CSeq_id::eAcc_unknown) {
                value = TAccInfo(old | CSeq_id::fAcc_fallback);
                if (old == hints.prev_type) {
                    *old_name = hints.prev_type_name;
                } else {
                    *old_name = "0x" + NStr::UIntToString(old, 0, 16);
                }
                if ( !key_used.empty() ) {
                    key_used = " (per " + key_used + ')';
                }
                ERR_POST_X(8, Info << "SAccGuide::AddRule: " << count
                           << ": ignoring refinement of " << tokens[1]
                           << " from " << *old_name << key_used
                           << " to unrecognized accession type " << tokens[2]);
            } else {
                auto it = hints.default_fallbacks.find(tokens[2]);
                if (it != hints.default_fallbacks.end()) {
                    *old_name = it->second;
                    value = TAccInfo(hints.FindAccInfo(*old_name)
                                     | CSeq_id::fAcc_fallback);
                    ERR_POST_X(17,
                               Info << "SAccGuide::AddRule: " << count
                               << ": using default fallback from " << tokens[2]
                               << " to " << *old_name << " for " << tokens[1]);
                } else {
                    *old_name = "unknown";
                    ERR_POST_X(3,
                               "SAccGuide::AddRule: " << count
                               << ": unrecognized accession type " << tokens[2]
                               << " for " << tokens[1]);
                }
            }
        }
        if (value != kUnrecognized) {
            SSubMap& submap = hints.FindSubMap(rules, fmt);
            if (tokens.size() == 4) {
                value = TAccInfo(value | CSeq_id::fAcc_specials);
            }
            const TAccInfo* value_ptr = NULL;
            if (tokens[1].find_first_of("?*") == NPOS) {
                value_ptr = &(submap.prefixes[tokens[1]] = value);
            } else {
                // Account for possible refinements of fallback definitions
                NON_CONST_ITERATE (TPairs, wit, submap.wildcards) {
                    if (wit->first == tokens[1]) {
                        wit->second = value;
                        value_ptr   = &wit->second;
                        break;
                    }
                }
                if (value_ptr == NULL) {
                    submap.wildcards.push_back(TPair(tokens[1], value));
                    value_ptr = &submap.wildcards.back().second;
                }
            }
            _ASSERT(*value_ptr == value);
            if ((value & CSeq_id::fAcc_fallback) != 0) {
                _ASSERT(old_name.get() != NULL  &&  !old_name->empty());
                fallbacks[value_ptr] = make_pair(*old_name, tokens[2]);
            } else {
                _ASSERT(old_name.get() == NULL);
            }
        }
    } else if (tokens.size() == 3 && NStr::EqualNocase(tokens[0], "special")) {
        hints.special2_submap = nullptr;
        pos  = tokens[1].find_first_of(kDigits);
        pos2 = tokens[1].find('-', pos);
        TFormatCode fmt
            = s_Key(pos, ((pos2 == NPOS) ? tokens[1].size() : pos2) - pos);
        TAccInfo old   = hints.FindSpecial(*this, fmt, tokens[1]);
        TAccInfo value = hints.FindAccInfo(tokens[2]);
        if ((old & CSeq_id::fAcc_specials) != 0) {
            old = TAccInfo(old & ~CSeq_id::fAcc_specials);
        } else {
            string key_used;
            Find(fmt, tokens[1].substr(0, pos2), &key_used);
            if ( !key_used.empty() ) {
                ERR_POST_X(13, Warning
                           << "SAccGuide::AddRule: Main listing for special "
                           << tokens[1]
                           << " doesn't indicate that specials are present.");
            }
        }
        unique_ptr<string> old_name;
        if (value == kUnrecognized) {
            string   key_used;
            Find(fmt, tokens[1].substr(0, pos2), &key_used);
            old_name.reset(new string);
            if ( !key_used.empty() ) {
                auto it = hints.default_fallbacks.find(tokens[2]);
                if (it != hints.default_fallbacks.end()) {
                    old = CSeq_id::eAcc_unknown;
                    *old_name = it->second;
                    value = TAccInfo(hints.FindAccInfo(*old_name)
                                     | CSeq_id::fAcc_fallback);
                    ERR_POST_X(17,
                               Info << "SAccGuide::AddRule: " << count
                               << ": using default fallback from " << tokens[2]
                               << " to " << *old_name << " for " << tokens[1]);
                }
            }
            if (old) {
                value = TAccInfo(old | CSeq_id::fAcc_fallback);
                if (old == hints.prev_type) {
                    *old_name = hints.prev_type_name;
                } else if (old == hints.prev_special_type) {
                    *old_name = hints.prev_special_type_name;
                } else {
                    *old_name = "0x" + NStr::UIntToString(old, 0, 16);
                }
                if ( !key_used.empty() ) {
                    key_used = " (per " + key_used + ')';
                }
                ERR_POST_X(4, Info << "SAccGuide::AddRule: " << count
                           << ": unrecognized accession type " << tokens[2]
                           << " for special case " << tokens[1]
                           << "; falling back to " << *old_name << key_used);
            } else if (old_name->empty()) {
                *old_name = "unknown";
                ERR_POST_X(9, Warning << "SAccGuide::AddRule: " << count
                           << ": unrecognized accession type " << tokens[2]
                           << " for stray(!) special case " << tokens[1]);
            }
        } else {
            _ASSERT(hints.prev_type == value);
            hints.prev_special_key = tokens[1];
        }
        if (value != kUnrecognized) {
            SSubMap& submap = hints.FindSubMap(rules, fmt);
            if (pos2 == NPOS) {
                tmp1 = tmp2 = tokens[1];
            } else {
                tmp1.assign(tokens[1], 0, pos2);
                tmp2.assign(tokens[1], pos2 + 1, NPOS);
            }
            x_AddSpecial(submap, hints, fmt, tmp1, tmp2, value, old_name.get(),
                         tokens[2]);
        }
        hints.prev_special_type_name.clear();
        hints.prev_special_type = CSeq_id::eAcc_unknown;
    } else if (tokens.size() >= 3
               &&  NStr::EqualNocase(tokens[0], "special2")) {
        if (hints.version < 2) {
            ERR_POST_X(18,
                       Warning << "SAccGuide::AddRule: " << count
                       << ": special2 valid only in version 2+ guides");
        }
        NStr::SplitInTwo(tokens[1], "+", tmp1, tmp2);
        auto digits = NStr::StringToNumeric<unsigned short>(tmp2);
        hints.prev_special_format = s_Key(tmp1.size(), digits);
        hints.special2_name = tokens[2];
        hints.special2_old_name.reset();
        hints.special2_submap
            = &hints.FindSubMap(rules, hints.prev_special_format);
        hints.prev_special2_acc = tmp1 + string(digits, '0');
        TAccInfo old = hints.FindSpecial(*this, hints.prev_special_format,
                                         hints.prev_special2_acc);
        string why;
        if ((old & CSeq_id::fAcc_specials) != 0) {
            old = TAccInfo(old & ~CSeq_id::fAcc_specials);
        } else {
            Find(hints.prev_special_format, hints.prev_special2_acc, &why);
            if ( !why.empty() ) {
                ERR_POST_X(13, Warning
                           << "SAccGuide::AddRule: Main listing for special "
                           << tokens[1]
                           << " doesn't indicate that specials are present.");
            }
        }
        for (size_t i = 2;  i < tokens.size();  ++i) {
            hints.special2_type = hints.FindAccInfo(tokens[i]);
            if (hints.special2_type != kUnrecognized) {
                if (i > 2) {
                    hints.special2_old_name.reset(new string(tokens[i]));
                }
                break;
            }
        }
        if (hints.special2_type == kUnrecognized) {
            for (size_t i = 2;  i < tokens.size();  ++i) {
                auto it = hints.default_fallbacks.find(tokens[i]);
                if (it != hints.default_fallbacks.end()) {
                    hints.special2_type = hints.FindAccInfo(it->second);
                    _ASSERT(hints.special2_type != kUnrecognized);
                    hints.special2_old_name.reset(new string(it->second));
                    why = " (per default fallback for " + tokens[i] + ')';
                    break;
                }
            }
            if (hints.special2_type == kUnrecognized) {
                hints.special2_old_name.reset(new string);
                if (old != kUnrecognized) {
                    hints.special2_type = old;
                    if (old == hints.prev_type) {
                        *hints.special2_old_name = hints.prev_type_name;
                    } else {
                        *hints.special2_old_name
                            = "0x" + NStr::UIntToString(old, 0, 16);
                    }
                    if ( !why.empty() ) {
                        why = " (per " + why + ')';
                    }
                } else {
                    *hints.special2_old_name = "unknown";
                    ERR_POST_X(9, Warning << "SAccGuide::AddRule: " << count
                               << ": unrecognized accession type " << tokens[2]
                               << " for stray(!) special case " << tokens[1]);
                }
            }
        }
        if (hints.special2_old_name.get() != nullptr) {
            hints.special2_type
                = TAccInfo(hints.special2_type | CSeq_id::fAcc_fallback);
            ERR_POST_X(4,
                       Info << "SAccGuide::AddRule: " << count
                       << ": unrecognized accession type " << tokens[2]
                       << " for special case " << tokens[1]
                       << "; falling back to " << *hints.special2_old_name
                       << why);
        }
        hints.prev_special_type_name.clear();
        hints.prev_special_type = CSeq_id::eAcc_unknown;
    } else if (tokens.size() >= 2  &&  tokens[0] == ":") {
        if (hints.version < 2) {
            ERR_POST_X(19,
                       Warning << "SAccGuide::AddRule: " << count
                       << ": special2 continuation lines valid only in"
                       " version 2+ guides");
        }
        if (hints.special2_submap == nullptr) {
            ERR_POST_X(20,
                       Warning <<
                       "SAccGuide::AddRule: " << count
                       << ": ignoring misplaced special2 ranges line.");
            return;
        }
        string s;
        CTempString from;
        char *p = &hints.prev_special2_acc[hints.prev_special2_acc.size()];
        for (size_t i = 1;  i < tokens.size();  ++i) {
            NStr::SplitInTwo(tokens[i], "-", tmp1, tmp2);
            memcpy(p - tmp1.size(), tmp1.data(), tmp1.size());
            if (tmp2.empty()) {
                from = hints.prev_special2_acc;
            } else {
                s = hints.prev_special2_acc;
                from = s;
                memcpy(p - tmp2.size(), tmp2.data(), tmp2.size());
            }
            x_AddSpecial(*hints.special2_submap, hints,
                         hints.prev_special_format, from,
                         hints.prev_special2_acc, hints.special2_type,
                         hints.special2_old_name.get(), hints.special2_name);
        }
    } else if (tokens.size() == 3 && NStr::EqualNocase(tokens[0], "gnl")) {
        hints.special2_submap = nullptr;
        string key(tokens[1]);
        NStr::ToUpper(key);
        TAccInfo value = hints.FindAccInfo(tokens[2]);
        if (value == kUnrecognized) {
            TPrefixes::iterator it2 = general.find(key);
            if (it2 == general.end()) {
                ERR_POST_X(3, "SAccGuide::AddRule: " << count
                           << ": unrecognized accession type " << tokens[2]
                           << " for " << key);
            } else {
                string old_name;
                if (it2->second == hints.prev_type) {
                    old_name = hints.prev_type_name;
                } else {
                    old_name = "0x" + NStr::UIntToString(it2->second, 0, 16);
                }
                it2->second = TAccInfo(it2->second | CSeq_id::fAcc_fallback);
                fallbacks[&it2->second] = make_pair(old_name, tokens[2]);
                ERR_POST_X(8, Info << "SAccGuide::AddRule: " << count
                           << ": ignoring refinement of " << key << " from "
                           << old_name << " to unrecognized accession type "
                           << tokens[2]);
            }
        } else {
            general[key] = value;
        }
    } else if (tokens.size() == 3
               &&  NStr::EqualNocase(tokens[0], "fallback")) {
        hints.special2_submap = nullptr;
        if (hints.version < 2) {
            ERR_POST_X(21,
                       Warning << "SAccGuide::AddRule: " << count
                       << ": default fallbacks valid only in version 2+"
                       " guides");
        }
        if (hints.FindAccInfo(tokens[2]) != kUnrecognized) {
            hints.default_fallbacks[tokens[1]] = tokens[2];
        }
    } else {
        ERR_POST_X(5, Warning << "SAccGuide::AddRule: " << count
                      << ": ignoring invalid line: " << rule);
    }
}

const SAccGuide::TAccInfo& SAccGuide::Find(TFormatCode fmt,
                                           const CTempString& acc_or_pfx,
                                           string* key_used) const
{
    static const TAccInfo kUnknown = CSeq_id::eAcc_unknown;
    TMainMap::const_iterator it = rules.find(fmt);
    if (it == rules.end()) {
        return kUnknown;
    }

    const SSubMap&            submap = it->second;
    const TAccInfo*           result = &kUnknown;
    CTempString               pfx     (acc_or_pfx, 0, fmt >> 16);
    TPrefixes::const_iterator pit    = submap.prefixes.find(pfx);
    if (pit != submap.prefixes.end()) {
        result = &pit->second;
    } else {
        ITERATE (TPairs, wit, submap.wildcards) {
            if (NStr::MatchesMask(pfx, wit->first)) {
                bool bad_match = false; // Limit ? to matching letters
                SIZE_TYPE pos = wit->first.find('?');
                while (pos != NPOS) {
                    if ( !isalnum(pfx[pos])  &&  pfx[pos] != '?' ) {
                        bad_match = true;
                        break;
                    } else {
                        pos = wit->first.find('?', pos + 1);
                    }
                }
                if (bad_match) {
                    continue;
                }
                if (key_used  &&  acc_or_pfx != wit->first) {
                    *key_used = wit->first;
                }
                result = &wit->second;
                break;
            }
        }
    }
    if (acc_or_pfx != pfx  &&  (*result & CSeq_id::fAcc_specials) != 0) {
        pfx = acc_or_pfx;
        auto n = x_SplitSpecial(pfx, fmt);
        for (auto ssit = submap.small_specials.lower_bound(pfx);
             ssit != submap.small_specials.end()  &&  ssit->first == pfx;
             ++ssit) {
            if (ssit->second.first[n]) {
                if (key_used) {
                    key_used->erase();
                }
                return ssit->second.second;
            }
        }
        TBigSpecialMap::const_iterator bsit
            = submap.big_specials.lower_bound(acc_or_pfx);
        if (bsit != submap.big_specials.end()
            &&  !(acc_or_pfx < bsit->second.first) ) {
            if (key_used) {
                key_used->erase();
            }
            return bsit->second.second;
        } else {
            if (key_used  &&  key_used->empty()) {
                *key_used = pfx.substr(0, fmt >> 16);
            }
            return *result;
        }
    } else /* if (*result != CSeq_id::eAcc_unknown) */ {
        return *result;
    }
}


SAccGuide::SAccGuide(void)
    : count(0)
{
    bool file_is_old = false;
    {{
        string file = g_FindDataFile("accguide2.txt");
        CTime builtin_timestamp(static_cast<time_t>(kBuiltInGuide_Timestamp));
        if ( !file.empty()  &&
             !(file_is_old = g_IsDataFileOld(file, builtin_timestamp)) ) {
            try {
                x_Load(file);
            } STD_CATCH_ALL_X(1, "SAccGuide::SAccGuide")
        }
    }}
    if (count == 0) {
        if (file_is_old) {
            ERR_POST_X(12, Info << "CSeq_id::IdentifyAccession: " // minor lie
                       "using built-in rules because accguide.txt is older.");
        } else {
            ERR_POST_X(6, Info << "CSeq_id::IdentifyAccession: "
                       "falling back on built-in rules.");
        }
        static const unsigned int kNumBuiltInRules
            = sizeof(kBuiltInGuide) / sizeof(*kBuiltInGuide);
        SHints hints;
        for (unsigned int i = 0;  i < kNumBuiltInRules;  ++i) {
            AddRule(kBuiltInGuide[i], hints);
        }
    }
    for (auto &rit : rules) {
        ERASE_ITERATE(TSmallSpecialMap, sit, rit.second.small_specials) {
            if (sit->second.first.any()) {
                sit->second.first.optimize();
            } else {
                rit.second.small_specials.erase(sit);
            }
        }
    }
    x_InitGeneral();
}

void SAccGuide::x_InitGeneral(void)
{
    if (general.empty()) {
        // Populate with a hard-coded list by default; there are only
        // a few tags to worry about, but listing them in accguide.txt
        // right away would yield warnings from old Toolkit versions.
        static const char* const kNucDBs[] = {
            "SRA", "TI", "TR_ASSM_CH", "TRACE_ASSM", "TRACE_CHGR", NULL
        };
        for (const char* const* p = kNucDBs;  *p;  ++p) {
            general[*p] = CSeq_id::eAcc_general_nuc;
        }
    }
}

void SAccGuide::x_Load(const string& filename)
{
    CRef<ILineReader> in(ILineReader::New(filename));
    x_Load(*in);
}

void SAccGuide::x_Load(ILineReader& in)
{
    SHints hints;
    do {
        AddRule(*++in, hints);
    } while ( !in.AtEOF() );
}

void SAccGuide::x_AddSpecial(SSubMap& submap, SHints& hints, TFormatCode fmt,
                             CTempString from, CTempString to, TAccInfo value,
                             const string* old_name,
                             const CTempString& new_name)
{
    CTempString from_pfx = from, to_pfx = to;
    auto left  = x_SplitSpecial(from_pfx, fmt),
         right = x_SplitSpecial(to_pfx, fmt);
    const TAccInfo* value_ptr = nullptr;
    if (from_pfx != to_pfx) {
        hints.prev_big_special
            = submap.big_specials.insert(hints.prev_big_special,
                                         make_pair(to, TPair(from, value)));
        // Account for possible refinement.
        hints.prev_big_special->second.second = value;
        /*
        if (pos2 == NPOS) {
            submap.big_specials[tokens[1]] = TPair(tokens[1], value);
        } else {
            // _VERIFY(NStr::SplitInTwo(tokens[1], "-", from, to));
            from.assign(tokens[1], 0, pos2);
            to.assign(tokens[1], pos2 + 1, NPOS);
            submap.big_specials[to] = TPair(from, value);
        }
        */
        if ((value & CSeq_id::fAcc_fallback) != 0) {
            value_ptr = &hints.prev_big_special->second.second;
        }
    } else {
        TSmallSpecialMap::iterator it = submap.small_specials.end();
        if (hints.prev_small_special != submap.small_specials.end()
            &&  hints.prev_small_special->first == from_pfx) {
            it = hints.prev_small_special;
            it->second.first.clear_range(left, right);
            while ((it->second.second & ~CSeq_id::fAcc_fallback)
                   != (value & ~CSeq_id::fAcc_fallback)) {
                if (it == submap.small_specials.begin()
                    ||  (--it)->first != from_pfx) {
                    it = hints.prev_small_special;
                    ++it;
                    break;
                }
            }
        } else {
            it = submap.small_specials.lower_bound(from_pfx);
        }
        while (it != submap.small_specials.end()) {
            if (it->first != from_pfx) {
                it = submap.small_specials.end();
                break;
            } else if ((it->second.second & ~CSeq_id::fAcc_fallback)
                       == (value & ~CSeq_id::fAcc_fallback)) {
                break;
            } else {
                ++it;
            }
        }
        if (it != submap.small_specials.end()) {
            _ASSERT(it->first == from_pfx);
            _ASSERT((it->second.second & ~CSeq_id::fAcc_fallback)
                    == (value & ~CSeq_id::fAcc_fallback));
            hints.prev_small_special = it;
        } else {
            auto size = kBVSizes[min(fmt & 0xffff, kMaxSmallSpecialDigits)];
            hints.prev_small_special = 
                submap.small_specials.emplace(
                    from_pfx, make_pair(bm::bvector<>(size), value));
        }
        hints.prev_small_special->second.first.set_range(left, right);
        // Account for possible refinement.
        hints.prev_small_special->second.second = value;
        if ((value & CSeq_id::fAcc_fallback) != 0) {
            value_ptr = &hints.prev_small_special->second.second;
        }
    }
    if (value_ptr != nullptr) {
        _ASSERT(old_name != nullptr  &&  !old_name->empty());
        fallbacks[value_ptr] = make_pair(*old_name, new_name);
    } else {
        _ASSERT(old_name == nullptr);
    }
}

bm::bvector_size_type SAccGuide::x_SplitSpecial(CTempString& acc,
                                                TFormatCode fmt)
{
    auto raw_digits = fmt & 0xffff, digits = raw_digits;
    auto normal_size = (fmt >> 16) + digits;
    if (digits == kMaxSmallSpecialDigits + 1) {
        digits -= 2;
    } else if (digits > kMaxSmallSpecialDigits) {
        digits = kMaxSmallSpecialDigits;
    }
    SIZE_TYPE pos;
    bm::bvector_size_type result;
    if (acc.size() == normal_size) {
        pos = acc.size() - digits;
        NStr::StringToNumeric(acc.substr(pos), &result);
    } else {
        _ASSERT(acc.size() == normal_size + 1);
        _ASSERT(digits >= 3);
        pos = (fmt >> 16) + 2;
        _ASSERT(isalpha(static_cast<unsigned char>(acc[pos])));
        NStr::StringToNumeric(acc.substr(pos + 1), &result);
        if (digits == raw_digits) {
            pos -= 2;
            result += (NStr::StringToNumeric<Uint1>(acc.substr(pos, 2))
                       * kBVSizes[digits - 2]);
        }
    }
    acc.erase(pos);
    return result;
}

static CRef<SAccGuide>* s_CreateGuide(void)
{
    return new CRef<SAccGuide>(new SAccGuide);
}

static CSafeStatic<CRef<SAccGuide> > s_Guide(s_CreateGuide, NULL);

CSeq_id::EAccessionInfo CSeq_id::IdentifyAccession(const CTempString& acc,
                                                   TParseFlags flags)
{
    SIZE_TYPE main_size = acc.find('.');
    bool has_version = true;
    if (main_size == NPOS) {
        has_version = false;
        main_size = acc.size();
    } else if (main_size >= acc.size() - 1
               ||  acc.find_first_not_of(kDigits, main_size + 1) != NPOS) {
        return eAcc_unknown; // non-numeric "version"
    }

    static const SIZE_TYPE kMainAccBufSize = 32;
    if (main_size <= kMainAccBufSize) {
        const unsigned char* ucdata = (const unsigned char*)acc.data();
        char main_acc_buf[kMainAccBufSize];
        for (SIZE_TYPE i = 0;  i < main_size;  ++i) {
            main_acc_buf[i] = toupper(ucdata[i]);
        }
        CTempString main_acc(main_acc_buf, main_size);
        return x_IdentifyAccession(main_acc, flags, has_version);
    } else {
        // Unlikely to prove recognizable (far too long for any standard
        // format as of January 2016), but try anyway.
        string main_acc(acc, 0, main_size);
        NStr::ToUpper(main_acc);
        return x_IdentifyAccession(main_acc, flags, has_version);
    }
}
     
CSeq_id::EAccessionInfo
CSeq_id::x_IdentifyAccession(const CTempString& main_acc, TParseFlags flags,
                             bool has_version)
{
    SIZE_TYPE digit_pos = main_acc.find_first_of(kDigits),
        main_size = main_acc.size();
    char flag_char = '\0';
    if (digit_pos == NPOS) {
        return eAcc_unknown;
    } else {
        SIZE_TYPE non_dig_pos = main_acc.find_first_not_of(kDigits, digit_pos);
        const unsigned char* ucdata = (const unsigned char*)main_acc.data();
        if (non_dig_pos != NPOS  &&  (flags & fParse_RawText) != 0) {
            if ( !has_version  &&  digit_pos == 0  &&  main_size >= 4
                &&  non_dig_pos < 5  &&  isalnum(ucdata[1])
                &&  isalnum(ucdata[2])  &&  isalnum(ucdata[3])) {
                // Possible PDB (always unversioned); examine further
                // to avoid false positives.
                if (main_size > 4  &&  main_size <= 17
                    &&  strchr("|-_", main_acc[4])
                    &&  (main_size <= 6  ||  isalnum(ucdata[5]))) {
                    // Conventionally delimited
                    return eAcc_pdb; 
                } else switch (main_size) {
 /*
                case 7:
                    if ((main_acc[5] != main_acc[6]
                         &&  (main_acc[5] != 'V' || main_acc[6] != 'B'))
                        ||  !isalpha(ucdata[5])) {
                        break;
                    } // else fall through
                case 6:
                    // Be extra strict when the potential molecule ID
                    // could simply be a year.  (NB: *insisting* on a
                    // non-digit would rule out 1914|A, gi 157829621.)
                    if ((non_dig_pos < 4  &&  ispunct(ucdata[4]))) {
                        return eAcc_pdb;
                    }
                    break;
                case 5:
                    if ((flags & fParse_ValidLocal) == 0) {
                        break;
                    } // else fall through
 */
                case 4:
                    return eAcc_pdb;
                }
            }
            if (digit_pos == 1  &&  main_size == 6
                &&  (main_acc[0] == 'O'  ||  main_acc[0] == 'P'
                     ||  main_acc[0] == 'Q' ||  isalpha(ucdata[2]))
                &&  isdigit(ucdata[1])  &&  isalnum(ucdata[2])
                &&  isalnum(ucdata[3])  &&  isalnum(ucdata[4])
                &&  isdigit(ucdata[5])) {
                return eAcc_swissprot;
            } else if (digit_pos == 1  &&  main_size == 10
                       &&  main_acc[0] != 'O'  &&  main_acc[0] != 'P'
                       &&  main_acc[0] != 'Q'
                       &&  isalpha(ucdata[2])  &&  isalnum(ucdata[3])
                       &&  isalnum(ucdata[4])  &&  isdigit(ucdata[5])
                       &&  isalpha(ucdata[6])  &&  isalnum(ucdata[7])
                       &&  isalnum(ucdata[8])  &&  isdigit(ucdata[9])) {
                return eAcc_swissprot;                
            } else if ( !has_version  &&  digit_pos == 0
                       &&  (non_dig_pos == 6  ||  non_dig_pos == 7)
                       &&  (main_size == non_dig_pos + 1
                            ||  main_acc[non_dig_pos + 1] == ':'
                            ||  (isalpha(ucdata[non_dig_pos + 1])
                                 &&  (main_size == non_dig_pos + 2
                                      ||  main_acc[non_dig_pos + 2] == ':')))) {
                // A formal spec appears to be elusive, but all examples in ID
                // contain six or seven digits followed by one or two letters,
                // followed in some rare cases by a tag such as :PDB=...
                return eAcc_prf;
            } else if (digit_pos >= 4  &&  non_dig_pos == digit_pos + 2
                       &&  main_size - non_dig_pos >= 6  &&  main_acc[3] != '_'
                       &&  (main_acc[non_dig_pos] == 'S'
                            ||  main_acc[non_dig_pos] == 'P')
                       &&  (main_acc.find_first_not_of
                            (kDigits, non_dig_pos + 1) == NPOS)) {
                flag_char = main_acc[non_dig_pos];
            } else {
                return eAcc_unknown;
            }
        }
    }

    if (digit_pos == 0) {
        if ((flags & fParse_RawGI) != 0  &&  !has_version
            &&  main_acc[0] != '0'
            &&  main_acc.find_first_not_of(kDigits) == NPOS) {
            return eAcc_gi; // just digits
        } else {
            return eAcc_unknown; // PDB already handled
        }
    } else if ((flags & fParse_RawText) == 0) {
        return eAcc_unknown;
    }

    SIZE_TYPE flag_len = (flag_char == '\0') ? 0 : 1;
    SIZE_TYPE digit_count = main_size - digit_pos - flag_len;
    auto& guide = *s_Guide;
    const EAccessionInfo& found_ai
        = guide->Find(SAccGuide::s_Key(digit_pos, digit_count), main_acc);
    EAccessionInfo ai = found_ai;
    if ((ai & fAcc_specials) != 0) {
        ai = EAccessionInfo(ai & ~fAcc_specials);
    }
    if ((ai & fAcc_fallback) != 0) {
        ai = EAccessionInfo(ai & ~fAcc_fallback);
        static bool s_ReportedFallback;
        if ((flags & fParse_FallbackOK) == 0  &&  !s_ReportedFallback ) {
            // TODO - arrange to skip when only interested in the overall type
            s_ReportedFallback = true;
            auto it = guide->fallbacks.find(&found_ai);
            if (it != guide->fallbacks.end()) {
                ERR_POST_X(14, Warning << "CSeq_id::IdentifyAccession:"
                           " Returning fallback type "
                           << it->second.first << " for accession "
                           << main_acc << ".  (Preferred type "
                           << it->second.second << " unrecognized.)");
            } else {
                ERR_POST_X(15, Warning << "CSeq_id::IdentifyAccession:"
                           " Returning fallback type 0x"
                           << NStr::UIntToString(ai, 0, 16)
                           << " for accession " << main_acc
                           << ".  (Internal error looking up names of"
                           " fallback and preferred types.)");
            }
        }
    }
    if (flag_char == 'P') {
        switch (ai & eAcc_division_mask) {
        case eAcc_targeted:
        case eAcc_wgs:
        // case eAcc_wgs_intermed:
            ai = EAccessionInfo((ai & eAcc_type_mask) | eAcc_wgs | fAcc_prot);
            break;
        case eAcc_tsa:
            ai = EAccessionInfo((ai & eAcc_type_mask) | eAcc_tsa | fAcc_prot);
            break;
        default:
            ERR_POST_X(11,
                       Warning << main_acc
                       << ": Protein flag found with unexpected division "
                       << ((ai & eAcc_division_mask) >> 8));
            ai = EAccessionInfo((ai & (eAcc_type_mask | eAcc_division_mask))
                                | fAcc_prot);
            break;
        }
    }
    switch (ai & eAcc_division_mask) {
    case eAcc_targeted:
    case eAcc_tsa:
    case eAcc_wgs:
    case eAcc_wgs_intermed:
        if (digit_pos >= 4
            &&  (main_acc.find_first_not_of
                 ("0", digit_pos /* + flag_len */ + 2) == NPOS)) {
            return EAccessionInfo(ai | fAcc_master);
        }
    default:
        break;
    }
    return ai;
}


CSeq_id::EAccessionInfo CSeq_id::IdentifyAccession(TParseFlags flags) const
{
    E_Choice type = Which();
    switch (type) {
    case e_Pir: case e_Swissprot: case e_Prf: // but *NOT* e_Pdb
        return (EAccessionInfo)(type | fAcc_prot); // always just protein

    case e_Genbank: case e_Embl:  case e_Ddbj:
    case e_Tpg:     case e_Tpe:   case e_Tpd:
    case e_Other:   case e_Gpipe: case e_Named_annot_track:
    {
        const CTextseq_id* tsid = GetTextseq_Id();
        if (tsid->IsSetAccession()) {
            // Can't necessarily go straight to x_IdentifyAccession, as
            // the accession may contain lowercase letters.
            EAccessionInfo ai = IdentifyAccession(tsid->GetAccession(), flags);
            E_Choice type2 = GetAccType(ai);
            auto div2 = ai & eAcc_division_mask;
            if (type2 == e_not_set) {
                // We *know* what the type should be....
                return (EAccessionInfo)((ai & eAcc_flag_mask) | type);
            } else if (type2 == type) {
                return ai;
            } else if (type == e_Tpe  &&  type2 == e_Embl
                       &&  (div2 == eAcc_other  ||  div2 == eAcc_wgs)) {
                return (EAccessionInfo)((ai & ~eAcc_type_mask) | type);
            } else { // misidentified or mislabeled; assume the former
                return static_cast<EAccessionInfo>(type);
            }
        } else {
            return static_cast<EAccessionInfo>(type);
        }
    }

    case e_General:
    {
        string db = GetGeneral().GetDb();
        NStr::ToUpper(db);
        SAccGuide::TPrefixes::const_iterator it = (*s_Guide)->general.find(db);
        return it == (*s_Guide)->general.end() ? eAcc_general : it->second;
    }

    default:
        return static_cast<EAccessionInfo>(type);
    }
}


void CSeq_id::LoadAccessionGuide(const string& filename)
{
    s_Guide->Reset(new SAccGuide(filename));
}

void CSeq_id::LoadAccessionGuide(ILineReader& in)
{
    s_Guide->Reset(new SAccGuide(in));
}


static inline
void x_GetLabel_Type(const CSeq_id& id, string* label,
                     CSeq_id::TLabelFlags flags)
{
    unsigned choice = id.Which();
    _ASSERT(choice < CSeq_id::e_MaxChoice);
    if (choice >= CSeq_id::e_MaxChoice) {
        return;
    }

    switch (choice) {
    case CSeq_id::e_General:
        // we may encode 'gnl' or the database name as requested
        if (flags & CSeq_id::fLabel_GeneralDbIsContent) {
            *label += id.GetGeneral().GetDb();
        } else {
            *label += "gnl";
        }
        break;

    case CSeq_id::e_Patent:
        *label += "pat";
        break;

    default:
        *label += s_TextId[choice];
        break;
    }

    // no extra flag interpretation currently
}


static inline
void x_GetLabel_Content(const CSeq_id& id, string* label,
                        CSeq_id::TLabelFlags flags, int* version)
{
    const CTextseq_id* tsid = id.GetTextseq_Id();

    if (version != NULL) {
        *version = 0;
    }

    //text id
    if (tsid) {
        string str;
        if (tsid->IsSetAccession()) {
            str = tsid->GetAccession();
            NStr::ToUpper(str);
        } else if (tsid->IsSetName()) {
            str = tsid->GetName();
        }

        if (version != NULL && tsid->IsSetVersion()) {
            *version = tsid->GetVersion();
        }
        if ( !str.empty() ) {
            if ( (flags & CSeq_id::fLabel_Version)  &&  tsid->IsSetVersion()) {
                str += "." + NStr::IntToString(tsid->GetVersion());
            }
        }
        *label += str;

    } else { //non-text id
        switch (id.Which()) {
        case CSeq_id::e_not_set:
            break;

        case CSeq_id::e_Local:
            {{
                const CObject_id& oid = id.GetLocal();
                if (oid.IsId()) {
                    *label += NStr::IntToString(oid.GetId());
                } else if (oid.IsStr()) {
                    *label += oid.GetStr();
                }
            }}
            break;

        case CSeq_id::e_Gibbsq:
            *label += NStr::IntToString(id.GetGibbsq());
            break;

        case CSeq_id::e_Gibbmt:
            *label += NStr::IntToString(id.GetGibbmt());
            break;

        case CSeq_id::e_Giim:
            *label += NStr::IntToString(id.GetGiim().GetId());
            break;

        case CSeq_id::e_General:
            {{
                const CDbtag& dbt = id.GetGeneral();
                if ((flags & CSeq_id::fLabel_GeneralDbIsContent) == 0) {
                    *label += dbt.GetDb() + ':';
                }
                if (dbt.GetTag().IsId()) {
                    *label += NStr::IntToString(dbt.GetTag().GetId());
                } else if (dbt.GetTag().IsStr()) {
                    *label += dbt.GetTag().GetStr();
                }
            }}
            break;

        case CSeq_id::e_Patent:
            {{
                const CId_pat& idp = id.GetPatent().GetCit();
                *label += idp.GetCountry();
                // *label += '|';
                *label += (idp.GetId().IsNumber() ?
                           idp.GetId().GetNumber() :
                           idp.GetId().GetApp_number());
                *label += '_'; // |
                *label += NStr::IntToString(id.GetPatent().GetSeqid());
            }}
            break;

        case CSeq_id::e_Gi:
            *label += NStr::NumericToString(id.GetGi());
            break;

        case CSeq_id::e_Pdb:
            {{
                const CPDB_seq_id& pid = id.GetPdb();
                *label += pid.GetMol().Get();
                if (pid.IsSetChain_id()) {
                    *label += '_';
                    *label += pid.GetChain_id();
                } else if (pid.IsSetChain()) {
                    unsigned char chain = static_cast<unsigned char>(pid.GetChain());
                    if (chain > ' ') {
                        *label += '_';
                        // previously if: (islower(chain)) then doubled the upper-case version with: *label += string(SIZE_TYPE(2), static_cast<char>(toupper(chain)));
                        *label += static_cast<char>(chain);
                    }
                }
            }}
            break;

        default:
            break;
        }
    }
}


void CSeq_id::GetLabel(string* label, ELabelType type, TLabelFlags flags) const
{
    if ( !label ) {
        return;
    }

    switch (type) {
    case eFasta:
        *label += AsFastaString();
        break;

    case eFastaContent:
    {
        CNcbiOstrstream oss;
        x_WriteContentAsFasta(oss);
        *label += CNcbiOstrstreamToString(oss);
        break;
    }
        
    case eBoth:
        x_GetLabel_Type(*this, label, flags);
        *label += "|";
        if (flags & fLabel_UpperCase) {
            NStr::ToUpper(*label);
            // ID-5290 : This function may be called for primary or secondary 
            // Seq-ids (e.g. gis), so need to check both primary and secondary id
            // values returned from the ComposeOSLT function. In the latter case,
            // always look at the first secondary ID in the list (there's almost
            // always just one anyway).
            // CXX-10440 : Original default version of ComposeOSLT function returns
            // empty string for local ids, but in this context local Seq-ids must
            // be parsed, hence use a special flag.
            string primary_id;
            list<string> secondary_id_list;
            primary_id = ComposeOSLT(&secondary_id_list, fAllowLocalId);
            if (!primary_id.empty())
                *label += primary_id;
            else if (secondary_id_list.size() > 0)
                *label += *secondary_id_list.begin();
            if (flags & fLabel_Version) {
                const CTextseq_id* tsid = GetTextseq_Id();
                if (tsid && tsid->IsSetVersion())
                    *label += "." + NStr::IntToString(tsid->GetVersion());
            }
        } else {
            x_GetLabel_Content(*this, label, flags, NULL);
        }
        break;

    case eType:
        x_GetLabel_Type(*this, label, flags);
        break;

    case eContent:
        x_GetLabel_Content(*this, label, flags, NULL);
        break;
    }

    if ((flags & fLabel_Trimmed) != 0
        &&  (type == eFasta  ||  type == eFastaContent)) {
        while ((*label)[label->size() - 1] == '|') {
            label->resize(label->size() - 1);
        }
    }
}

void CSeq_id::GetLabel(string* label, int* version, ELabelType type) const
{
    if ( !label ) {
        return;
    }

    switch (type) {
    case eFasta:
        *label += AsFastaString();
        break;

    case eFastaContent:
    {
        CNcbiOstrstream oss;
        x_WriteContentAsFasta(oss);
        *label += CNcbiOstrstreamToString(oss);
        break;
    }
        
    case eBoth:
        x_GetLabel_Type(*this, label, 0);
        *label += "|";
        x_GetLabel_Content(*this, label, 0, version);
        break;

    case eType:
        x_GetLabel_Type(*this, label, 0);
        break;

    case eContent:
        x_GetLabel_Content(*this, label, 0, version);
        break;
    }
}



/*Return seqid string with optional version for text seqid type
(default no version).*/
string CSeq_id::GetSeqIdString(bool with_version) const
{
    string label;
    TLabelFlags flags = 0;
    if (with_version) {
        flags |= fLabel_Version;
    }
    GetLabel(&label, eContent, flags);
    return label;
}

string CSeq_id::GetSeqIdString(int* version) const
{
    string label;
    GetLabel(&label, version, eContent);
    return label;
}


void CSeq_id::WriteAsFasta(ostream& out)
    const
{
    unsigned the_type = Which();
    if (the_type >= e_MaxChoice)  // New SeqId type
        the_type = e_not_set;

    if (IsPatent()  &&  !GetPatent().GetCit().GetId().IsNumber() ) {
        const char pgp[] = "pat|";
        out.write(pgp, sizeof(pgp) - 1);
    } else if (IsSwissprot()  &&  GetSwissprot().IsSetRelease()
               &&  GetSwissprot().GetRelease() == "unreviewed") {
        const char tr[] = "tr|";
        out.write(tr, sizeof(tr) - 1);
    } else {
        out.write(s_TextId[the_type], strlen(s_TextId[the_type]));
        out.put('|');
    }

    x_WriteContentAsFasta(out);
}

void CSeq_id::x_WriteContentAsFasta(ostream& out) const
{
    unsigned the_type = Which();
    if (the_type >= e_MaxChoice)  // New SeqId type
        the_type = e_not_set;

    switch (the_type) {
    case e_not_set:
        break;
    case e_Local:
        GetLocal().AsString(out);
        break;
    case e_Gibbsq:
        out << GetGibbsq();
        break;
    case e_Gibbmt:
        out << GetGibbmt();
        break;
    case e_Giim:
        out << (GetGiim().GetId());
        break;
    case e_Genbank:
        GetGenbank().AsFastaString(out);
        break;
    case e_Embl:
        GetEmbl().AsFastaString(out);
        break;
    case e_Pir:
        GetPir().AsFastaString(out);
        break;
    case e_Swissprot:
        GetSwissprot().AsFastaString(out);
        break;
    case e_Patent:
        GetPatent().AsFastaString(out);
        break;
    case e_Other:
        GetOther().AsFastaString(out);
        break;
    case e_General:
        {
            const CDbtag& dbt = GetGeneral();
            out << (dbt.GetDb()) << '|';  // no Upcase per Ostell - Karl 7/2001
            dbt.GetTag().AsString(out);
        }
        break;
    case e_Gi:
        out << GetGi();
        break;
    case e_Ddbj:
        GetDdbj().AsFastaString(out);
        break;
    case e_Prf:
        GetPrf().AsFastaString(out);
        break;
    case e_Pdb:
        GetPdb().AsFastaString(out);
        break;
    case e_Tpg:
        GetTpg().AsFastaString(out);
        break;
    case e_Tpe:
        GetTpe().AsFastaString(out);
        break;
    case e_Tpd:
        GetTpd().AsFastaString(out);
        break;
    case e_Gpipe:
        // don't suppress version after all
        GetGpipe().AsFastaString(out /*, false */);
        break;
    case e_Named_annot_track:
        GetNamed_annot_track().AsFastaString(out);
        break;
    default:
        out << "[UnknownSeqIdType]";
        break;
    }
}

const string CSeq_id::AsFastaString(void) const
{
#ifdef HAVE_THREAD_LOCAL
    thread_local static CNcbiOstrstream str;
    str.seekp(0);
#if NCBI_SHUN_OSTRSTREAM
    str.str("");
#endif

    // VS2017 needs this call presumably because the first time seekp(0) is
    // called on an empty stream and thus a failbit is set.
    str.clear();
#else
    CNcbiOstrstream str;
#endif
    WriteAsFasta(str);
    return CNcbiOstrstreamToString(str);
}


//
// GetStringDescr()
// Given a bioseq, return the best possible ID description, in a number of
// appealing formats.  This function can produce FastA-formatted titles or a
// number of sub-titles (GI only, Best Accession with or without version).
//
string CSeq_id::GetStringDescr(const CBioseq& bioseq, EStringFormat fmt)
{
    if (fmt == eFormat_FastA) {
        CNcbiOstrstream ostr;
        WriteAsFasta(ostr, bioseq);
        return CNcbiOstrstreamToString(ostr);
    }

    bool is_na            = bioseq.GetInst().GetMol() != CSeq_inst::eMol_aa;
    CRef<CSeq_id> best_id = FindBestChoice(bioseq.GetId(),
                                           is_na ? CSeq_id::FastaNARank
                                           : CSeq_id::FastaAARank);
    switch (fmt) {
    case eFormat_ForceGI:
        // eForceGI produces a string containing only the GI in FastA format
        // so we have:
        //    gi|####
        ITERATE (CBioseq::TId, iter, bioseq.GetId()) {
            if ( (*iter)->IsGi() ) {
                CNcbiOstrstream out_str;
                (*iter)->WriteAsFasta(out_str);

                return CNcbiOstrstreamToString(out_str);
            }
        }
        break;

    case eFormat_BestWithVersion:
        // eBestWithVersion produces only the 'best' accession name, with
        // its version indicator
        if (best_id.NotEmpty()) {
            string label;
            best_id->GetLabel(&label, eDefault, fLabel_Version);
            return label;
        }
        break;

    case eFormat_BestWithoutVersion:
        // eBestWithoutVersion produces only the 'best' accession name,
        // without its version indicator
        if (best_id.NotEmpty()) {
            string label;
            best_id->GetLabel(&label, eDefault, 0);
            return label;
        }
        break;

    default:
        break;
    }

    // catch-all for unusual events
    return "";
}

CNcbiOstream& CSeq_id::WriteAsFasta(CNcbiOstream& ostr, const CBioseq& bioseq)
{
    bool is_na            = bioseq.GetInst().GetMol() != CSeq_inst::eMol_aa;
    CRef<CSeq_id> best_id = FindBestChoice(bioseq.GetId(),
                                           is_na ? CSeq_id::FastaNARank
                                           : CSeq_id::FastaAARank);

    // FastA format
    // Here we have something like:
    //      gi|###|SOME_ACCESSION|title
    bool found_gi = false;

    ITERATE (CBioseq::TId, id, bioseq.GetId()) {
        if ((*id)->IsGi()) {
            (*id)->WriteAsFasta(ostr);
            found_gi = true;
            break;
        }
    }

    if (best_id.NotEmpty()  &&  !best_id->IsGi() ) {
        if (found_gi) {
            ostr << '|';
        }

        best_id->WriteAsFasta(ostr);
    }

    return ostr;
}


CSeq_id::CSeq_id(const CDbtag& dbtag, bool set_as_general)
{
    Set(dbtag, set_as_general);
}

CSeq_id& CSeq_id::Set(const CDbtag& dbtag, bool set_as_general)
{
    int         version = -1;
    CTempString acc;
    string      accver;

    switch (dbtag.GetTag().Which()) {
    case CObject_id::e_Str:
        accver = dbtag.GetTag().GetStr();
        s_SplitVersion(accver, acc, version);
        break;
    case CObject_id::e_Id:
        acc = accver = NStr::IntToString(dbtag.GetTag().GetId());
        break;
    default:
        NCBI_THROW(CSeqIdException, eFormat,
                   "Bad CDbtag tag type "
                   + CObject_id::SelectionName(dbtag.GetTag().Which()));
        break;
    }

    CDbtag::EDbtagType type = dbtag.GetType();
    switch (type) {
    case CDbtag::eDbtagType_EMBL:
        SetEmbl().Set(accver);
        break;

    case CDbtag::eDbtagType_DDBJ:
        SetDdbj().Set(accver);
        break;

    case CDbtag::eDbtagType_GI:
        if (dbtag.GetTag().IsStr()) {
            Set(e_Gi, dbtag.GetTag().GetStr());
        } else {
            SetGi(GI_FROM(CObject_id::TId, dbtag.GetTag().GetId()));
        }
        break;

    case CDbtag::eDbtagType_bad:
    default:
        // not understood as a sequence id
        if (set_as_general) {
            SetGeneral().Assign(dbtag);
        } else {
            NCBI_THROW(CSeqIdException, eFormat,
                       "Unrecognized Dbtag DB " + dbtag.GetDb());
        }
        break;
    }

    return *this;
}

inline
CSeq_id::ETypeVariant CSeq_id::x_IdentifyTypeVariant(CSeq_id::E_Choice type,
                                                     const CTempString& str)
{
    if (type == CSeq_id::e_Swissprot  &&  NStr::EqualNocase(str, "tr")) {
        return eTV_tr;
    } else if (type == CSeq_id::e_Patent  &&  NStr::EqualNocase(str, "pgp")) {
        return eTV_pgp;
    } else {
        return eTV_plain;
    }
}

//SeqIdFastAConstructors
CSeq_id::CSeq_id(const CTempString& the_id, TParseFlags flags)
{
    Set(the_id, flags);
}

CSeq_id& CSeq_id::Set(const CTempString& the_id_in, TParseFlags flags)
{
    CTempString the_id = NStr::TruncateSpaces_Unsafe(the_id_in,
                                                     NStr::eTrunc_Both);
    E_Choice    type   = e_not_set;

    if ((flags & fParse_NoFASTA) == 0) {
        type = s_CheckForFastaTag(the_id);
    }
    if (type == e_not_set) {
        if (the_id.empty()) {
            NCBI_THROW(CSeqIdException, eFormat,
                       "Empty bare accession supplied");
        }
        // If no (attempt at a) valid tag, tries to interpret the string
        // as a pure accession.
        if ((flags & fParse_AnyRaw) != 0) {
            type = GetAccType(IdentifyAccession(the_id,
                                                flags | fParse_FallbackOK));
        }
        switch (type) {
        case e_Gi:
            return Set(type, the_id);
        case e_not_set:
        {
            // Check for general IDs, albeit only with well-known
            // database names like SRA.
            SIZE_TYPE colon_pos = the_id.find(':');
            if (colon_pos != NPOS) {
                string db = the_id.substr(0, colon_pos);
                NStr::ToUpper(db);
                // const auto& whitelist = (*s_Guide)->general;
                const auto& whitelist = kSupportedRawDbtags;
                if (whitelist.find(db) != whitelist.end()) {
                    // Reextract prefix to preserve case.
                    return Set(e_General, the_id.substr(0, colon_pos),
                               the_id.substr(colon_pos + 1));
                }                
            }
            if ((flags & fParse_ValidLocal) != 0
                &&  ((flags & fParse_AnyLocal) == fParse_AnyLocal
                     ||  IsValidLocalID(the_id))) {
                return Set(e_Local, the_id);
            } else {
                NCBI_THROW(CSeqIdException, eFormat,
                           "Malformatted ID " + string(the_id));
            }
        }
        case e_Prf:
            // technically a name/locus, not an accession!
            return Set(type, kEmptyStr, the_id);
        case e_Pdb:
        {
            string mol(the_id, 0, 4), chain;
            // NStr::SplitInTwo(the_id, "|", mol, chain);
            if (the_id.size() > 5) {
                chain = the_id.substr(5);
            } else if (the_id.size() == 5  &&  the_id[4] != '|') {
                chain = the_id[4];
            }
            return Set(type, mol, chain);
        }
        default:
        {
            CTempString acc;
            int         ver;
            s_SplitVersion(the_id, acc, ver);
            return Set(type, acc, kEmptyStr, ver);
        }
        }
    } else {
        list<CTempString> fasta_pieces;
        NStr::Split(the_id, "|", fasta_pieces);
        ETypeVariant tv = x_IdentifyTypeVariant(type, fasta_pieces.front());
        fasta_pieces.pop_front();
        x_Init(fasta_pieces, type, tv);
        if ( !fasta_pieces.empty() ) {
            // tolerate trailing parts if they're all empty.
            ITERATE(list<CTempString>, it, fasta_pieces) {
                if ( !it->empty() ) {
                    if ((flags & fParse_PartialOK) != 0) {
                        ERR_POST_X(10, Warning << "Ignoring extra parts"
                                   " (synonyms?) in FASTA-style ID "
                                   << the_id);
                    } else {
                        NCBI_THROW(CSeqIdException, eFormat,
                                   "FASTA-style ID " + string(the_id)
                                   + " has too many parts.");
                    }
                }
            }
        }
        return *this;
    }
}


CSeq_id::CSeq_id(EFastaAsTypeAndContent f, E_Choice the_type,
                 const CTempString& the_content)
{
    Set(f, the_type, the_content);
}

CSeq_id& CSeq_id::Set(EFastaAsTypeAndContent f, E_Choice the_type,
                      const CTempString& the_content)
{
    list<CTempString> fasta_pieces;
    ETypeVariant tv = eTV_plain; // default assumption
    NStr::Split(the_content, "|", fasta_pieces);
    if ( !fasta_pieces.empty()
        &&  WhichInverseSeqId(fasta_pieces.front()) == the_type) {
        tv = x_IdentifyTypeVariant(the_type, fasta_pieces.front());
        fasta_pieces.pop_front();
    }
    x_Init(fasta_pieces, the_type, tv);
    return *this;
}


bool CSeq_id::IsValidLocalID(const CTempString& s)
{
    return (fNoError == CheckLocalID(s));
}


CSeq_id::TErrorFlags
CSeq_id::CheckLocalID(const CTempString& s)
{
    if (NStr::IsBlank(s)) {
        return fEmptyId;
    }

    TErrorFlags error_flags = fNoError;
    if (s.length() > kMaxLocalIDLength) {
        error_flags |= fExceedsMaxLength;
    }

    static const char* kIllegal = " >[]|\"";
    CSeq_id_find_pred pred; pred.kSymbols = kIllegal;
    if (find_if(s.begin(), s.end(), pred) != s.end()) {
        error_flags |= fInvalidChar;
    }
    return error_flags;
}


SIZE_TYPE CSeq_id::ParseFastaIds(CBioseq::TId& ids, const CTempString& s,
                                 bool allow_partial_failure)
{
    TParseFlags flags = fParse_RawText | fParse_AnyLocal;
    if (allow_partial_failure) {
        flags |= fParse_PartialOK;
    }
    return ParseIDs(ids, s, flags);
}

SIZE_TYPE CSeq_id::ParseIDs(CBioseq::TId& ids, const CTempString& s,
                            TParseFlags flags)
{
    CTempString ss = NStr::TruncateSpaces_Unsafe(s, NStr::eTrunc_Both);
    if (ss.empty()) {
        return 0;
    }

    // first simple check to make it faster
    if (!s_HasFastaTag(ss)) {
        CRef<CSeq_id> id(new CSeq_id(ss, flags | fParse_NoFASTA));
        ids.push_back(id);
        return 1;
    }

    SIZE_TYPE count = 0;
    list<CTempString> fasta_pieces;
    NStr::Split(ss, "|", fasta_pieces);
    _ASSERT(fasta_pieces.size() > 0);
    if (fasta_pieces.size() == 1)
    {
        CRef<CSeq_id> id(new CSeq_id(ss, flags | fParse_NoFASTA));
        ids.push_back(id);
        count = 1;
    }
    else
    {
        E_Choice     type = WhichInverseSeqId(fasta_pieces.front());
        ETypeVariant tv;
        if (type == e_not_set) {
            if (fasta_pieces.size() == 2) {
                // unknown database are reported as 'general'
                type = e_General;
            }
            tv   = eTV_plain;
        } else {
            tv = x_IdentifyTypeVariant(type, fasta_pieces.front());
            fasta_pieces.pop_front();
        }
        while ( !fasta_pieces.empty() ) {
            try {
                CRef<CSeq_id> id(new CSeq_id);
                if (type != e_not_set) {
                    type = id->x_Init(fasta_pieces, type, tv);
                }
                if (type == e_not_set  &&  !fasta_pieces.empty() ) {
                    type = WhichInverseSeqId(fasta_pieces.front());
                    if (type == e_not_set) {
                        CTempString typestr = fasta_pieces.front();
                        fasta_pieces.pop_front();
                        NCBI_THROW(CSeqIdException, eFormat,
                                   "Unsupported ID type " + typestr);
                    }
                }
                if (type != e_not_set) {
                    _ASSERT( !fasta_pieces.empty() );
                    tv = x_IdentifyTypeVariant(type, fasta_pieces.front());
                    fasta_pieces.pop_front();
                }
                ids.push_back(id);
                ++count;
            } catch (std::exception& e) {
                if (fasta_pieces.empty()) {
                    throw;
                }
                if ((flags & fParse_PartialOK) != 0) {
                    ERR_POST_X(7, Warning << e.what());
                    do {
                        auto l = fasta_pieces.front().size();
                        if (l != 2  &&  l != 3) {
                            fasta_pieces.pop_front();
                        } else {
                            break;
                        }
                    } while ( !fasta_pieces.empty() );
                } else {
                    throw;
                }
            }
        }
    }
    return count;
}


CSeq_id::E_Choice CSeq_id::x_Init(list<CTempString>& fasta_pieces,
                                  E_Choice type, ETypeVariant tv)
{
    _ASSERT(!fasta_pieces.empty());
    _ASSERT(type != e_not_set);

    vector<CTempString> fields(3);
    SIZE_TYPE   min_fields, max_fields;
    E_Choice    next_type = e_not_set;
    switch (type) {
    case e_Local:
    case e_Gibbsq:
    case e_Gibbmt:
    case e_Giim:
    case e_Gi:
        min_fields = max_fields = 1;
        break;
    case e_Patent:
        min_fields = max_fields = 3;
        break;
    case e_General:
        min_fields = max_fields = 2;
        break;
#if 0 // release no longer used
    case e_Other:
        min_fields = 1;
        max_fields = 3;
        break;
#endif
    default: // text seqid: accession and optional name
        min_fields = 1;
        max_fields = 2;
        break;
    }

    for (SIZE_TYPE i = 0;  i < max_fields;  ++i) {
        if (fasta_pieces.empty()) {
            if (i >= min_fields) {
                break;
            } else {
                NCBI_THROW(CSeqIdException, eFormat,
                           "Not enough fields for ID of type "
                           + string(s_TextId[type]));
            }
        } else {
            if (i >= min_fields  &&  fasta_pieces.size() > 1
                &&  (fasta_pieces.front().size() == 2
                     ||  fasta_pieces.front().size() == 3)
                &&  ((next_type = WhichInverseSeqId(fasta_pieces.front()))
                     != e_not_set)) {
                // Likely mid-string optional-field omission;
                // conservatively treat as such only if unable to
                // parse the following piece as an ID type, though.
                list<CTempString>::iterator it = fasta_pieces.begin();
                ++it;
                _ASSERT(it != fasta_pieces.end());
                E_Choice next_type_2;
                if ((it->size() == 2  ||  it->size() == 3)
                    &&  (next_type_2 = WhichInverseSeqId(*it)) != e_not_set) {
                    next_type = next_type_2;
                } else {
                    break;
                }
            }
            fields[i] = fasta_pieces.front();
            fasta_pieces.pop_front();
        }
    }

    // Special case -- dbSNP IDs have historically contained internal
    // vertical bars, so we have to parse them greedily.
    string snp_name; // must survive until the end of the function
    if (type == e_General  &&  NStr::EqualNocase(fields[0], "dbSNP")
        &&  !fasta_pieces.empty() ) {
        snp_name = string(fields[1]) + '|' + NStr::Join(fasta_pieces, "|");
        fields[1] = snp_name;
        fasta_pieces.clear();
    }

    // Clear out extra empty pieces
    while ( !fasta_pieces.empty()  &&  fasta_pieces.front().empty() ) {
        fasta_pieces.pop_front();
    }

    int ver = 0;
    switch (type) {
    case e_Swissprot:
        if (tv == eTV_tr) {
            fields[2] = "unreviewed";
        } else {
            fields[2] = "reviewed";
        }
        break;

    case e_Patent:
        // "version" actually sequence number within patent, but whatever...
        ver = NStr::StringToNonNegativeInt(fields[2]);
        if (ver < 0) {
            NCBI_THROW(CSeqIdException, eFormat,
                       "Bad sequence number " + string(fields[2]) + " for "
                       + string(fields[0]) + " patent " + string(fields[1]));
        }
        // to distinguish applications from granted patents; the numeric
        // content has already made its way into ver.
        fields[2] = "pat";
        break;

    case e_Pdb:
        if (fields[0].size() < 4
            ||  (fields[0].size() > 5
                 &&  ( !fields[1].empty()
                       ||  strchr("|-_", fields[0][4]) == NULL))) {
            NCBI_THROW(CSeqIdException, eFormat,
                       "Malformatted PDB ID " + string(fields[0]));
        }
        if (fields[0].size() > 4  &&  fields[1].empty()) { // misdelimited
            if (fields[0].size() > 5) {
                fields[1] = fields[0].substr(5);
            } else {
                _ASSERT(fields[0][4] != '|');
                fields[1] = fields[0].substr(4);
            }
            fields[0] = fields[0].substr(0, 4);
        }
        break;

    default:
        break; // avoid compiler warnings
    }

    Set(type, fields[0] /* acc */, fields[1] /* name */, ver,
        fields[2] /* rel */);

    return next_type;
}


CSeq_id::CSeq_id(E_Choice the_type, TIntId the_id)
{
    Set(the_type, the_id);
}

#ifdef NCBI_STRICT_GI
CSeq_id::CSeq_id(E_Choice the_type, TGi gi)
{
    Set(the_type, GI_TO(TIntId, gi));
}
#endif

CSeq_id& CSeq_id::Set(E_Choice the_type, TIntId the_id)
{
// see CSeq_id::Set below, it prohibits lcl|0, but allows gi|0
    if ((the_id < 0) || (the_type == e_Local && the_id == 0)) {
        NCBI_THROW(CSeqIdException, eFormat,
                   "Non-positive numeric ID " + NStr::NumericToString(the_id));
    }

    switch (the_type) {
    case e_Local:
        SetLocal().SetId(INT_ID_TO(CObject_id::TId, the_id));
        break;
    case e_Gibbsq:
        SetGibbsq(INT_ID_TO(CSeq_id::TGibbsq, the_id));
        break;
    case e_Gibbmt:
        SetGibbmt(INT_ID_TO(CSeq_id::TGibbmt, the_id));
        break;
    case e_Giim:
    {
        CGiimport_id& giim = SetGiim();
        giim.SetId(INT_ID_TO(CGiimport_id::TId, the_id));
        giim.ResetDb();
        giim.ResetRelease();
        break;
    }
    case e_Gi:
        SetGi(GI_FROM(TIntId, the_id));
        break;
    default:
        NCBI_THROW(CSeqIdException, eFormat,
                   "Invalid numeric ID type" + SelectionName(the_type));
    }
    return *this;
}


CSeq_id::CSeq_id(E_Choice           the_type,
                 const CTempString& acc_in,
                 const CTempString& name_in,
                 int                version,
                 const CTempString& release_in)
{
    Set(the_type, acc_in, name_in, version, release_in);
}

// Karl Sirotkin 7/2001

CSeq_id& CSeq_id::Set(E_Choice           the_type,
                      const CTempString& acc_in,
                      const CTempString& name_in,
                      int                version,
                      const CTempString& release_in)
{
    CTempString  acc       = NStr::TruncateSpaces_Unsafe(acc_in,
                                                         NStr::eTrunc_Both);

    int          the_id;
    CTextseq_id* tsid      = 0;
    bool         allow_dot = true;

    switch (the_type) {
    case e_not_set: // Will cause unspecified SeqId to be returned.
        break;

    case e_Local:
        SetLocal().SetStrOrId(acc);
        break;

        // numeric IDs
    case e_Gibbsq:
    case e_Gibbmt:
    case e_Giim:
    case e_Gi:
#ifdef NCBI_INT8_GI
        if ( the_type == e_Gi ) {
            try {
                TGi gi = NStr::StringToNumeric<TGi>(acc);
                if ( gi > ZERO_GI ) {
                    SetGi(gi);
                    return *this;
                }
            }
            catch ( CException& /*ignored*/ ) {
                // will be processed by the code below
            }
        }
#endif
        if ( (the_id = NStr::StringToNonNegativeInt (acc)) >= 0 ) {
            return Set(the_type, the_id);
        } else {
            NCBI_THROW(CSeqIdException, eFormat,
                       "Negative, excessively large, or non-numeric "
                       + SelectionName(the_type)
                       + " ID " + string(acc));
        }
        break;

        // text IDs
    case e_Genbank:    tsid = &SetGenbank();    break;
    case e_Embl:       tsid = &SetEmbl();       break;
    case e_Pir:        tsid = &SetPir();        allow_dot = false;  break;
    case e_Swissprot:  tsid = &SetSwissprot();  break;
    case e_Other:      tsid = &SetOther();      break;
    case e_Ddbj:       tsid = &SetDdbj();       break;
    case e_Prf:        tsid = &SetPrf();        allow_dot = false;  break;
    case e_Tpg:        tsid = &SetTpg();        break;
    case e_Tpe:        tsid = &SetTpe();        break;
    case e_Tpd:        tsid = &SetTpd();        break;
    case e_Gpipe:      tsid = &SetGpipe();      break;
    case e_Named_annot_track:  tsid = &SetNamed_annot_track();  break;

    case e_Patent:
        {
            CTempString      name      =
                NStr::TruncateSpaces_Unsafe(name_in, NStr::eTrunc_Both);
            CTempString      release   =
                NStr::TruncateSpaces_Unsafe(release_in, NStr::eTrunc_Both);
            CPatent_seq_id&  pat       = SetPatent();
            CId_pat&         id_pat    = pat.SetCit();
            CId_pat::C_Id&   id_pat_id = id_pat.SetId();
            id_pat.SetCountry(acc);

            if (NStr::EqualNocase(release, "pgp")) {
                id_pat_id.SetApp_number(name);
            } else {
                id_pat_id.SetNumber(name);
            }
            id_pat.ResetDoc_type();
            pat.SetSeqid(version);
            break;
        }

    case e_General:
        {
            CTempString name = NStr::TruncateSpaces_Unsafe(name_in,
                                                           NStr::eTrunc_Both);
            CDbtag&     dbt  = SetGeneral();
            dbt.SetDb(acc);
            dbt.SetTag().SetStrOrId(name);
            break;
        }

    case e_Pdb:
        {
            CTempString  name = NStr::TruncateSpaces_Unsafe(name_in,
                                                            NStr::eTrunc_Both);
            CPDB_seq_id& pdb  = SetPdb();
            pdb.SetMol().Set(acc);

            // Consult name_in in addition to name as whitespace
            // stripping can lose relevant information here.
            if (name_in.empty()) {
                pdb.ResetChain();
            } else if (name.empty()) {
                pdb.SetChain(' ');
                name = " ";
            } else if (name.size() == 1) {
                pdb.SetChain(static_cast<unsigned char>(name[0]));
            } else {
                pdb.ResetChain();
                ERR_POST_X(16,
                           Info << "Necessarily using backwards-incompatible"
                           " representation for chain " << string(name)
                           << " of PDB molecule " << acc << '.');
            }
            if (name.empty()) {
                pdb.ResetChain_id();
            } else {
                pdb.SetChain_id(name);
            }
            pdb.ResetRel();
            break;
        }

    default:
        NCBI_THROW(CSeqIdException, eFormat,
                   "Unsupported Seq-id type " + SelectionName(the_type));
    }

    if (tsid) {
        // CTextseq_id::Set will take care of truncating any spaces.
        tsid->Set(acc, name_in, version, release_in, allow_dot);
    }

    return *this;
}


int CSeq_id::BaseTextScore(void) const
{
    switch (Which()) {
    // Accession and accession-like ids - only one can be present in a bioseq's list of ids,
    // the order is not important.
    case e_Other:
    case e_Swissprot:
    case e_Pir:
    case e_Pdb:
    case e_Genbank:
    case e_Embl:
    case e_Ddbj:
    case e_Tpg:
    case e_Tpe:
    case e_Tpd:                 return 10;

    // Second group of mutually exclusive ids, any order can be used.
    case e_Gpipe:
    case e_Named_annot_track:
    case e_Prf:                 return 20;
    case e_Patent:              return 50;

    // "local" < "general" < "gi"
    case e_Local:               return 50;
    case e_General:             return 60;
    case e_Gi:                  return PreferAccessionOverGi() ? kMaxScore + 1 : 70;

    // All other ids rank just above "not-set" except the obsolete ones listed below.
    case e_not_set:             return 100;

    // Obsolete ids, lowest rank, any order is OK.
    case e_Giim:
    case e_Gibbmt:
    case e_Gibbsq:              return 1000;

    // All other ids should go just above "not-set".
    default:                    return 90;
    }
}


int CSeq_id::BaseBestRankScore(void) const
{
    switch (Which()) {
    case e_not_set:                               return 83;
    case e_General: case e_Local:                 return 80;
    case e_Gibbsq: case e_Gibbmt: case e_Giim:    return 70;
    case e_Named_annot_track:                     return 69;
    case e_Gpipe:                                 return 68;
    case e_Patent:                                return 67;
    case e_Other:                                 return 65;
    case e_Gi: return PreferAccessionOverGi() ? kMaxScore + 1 : 51;
    default:                                      return 60;
    }
}


int CSeq_id::BaseFastaNAScore(void) const
{
    switch (Which()) {
        // these few are bogus, at least for nucleotide sequences
    case e_not_set: case e_Giim:
    case e_Pir: case e_Swissprot: case e_Prf:  return 255;
    case e_Local:                              return 230;
    case e_Gi: return PreferAccessionOverGi() ? kMaxScore + 1 : 120;
    case e_General:
        {
        const string& db = GetGeneral().GetDb();
        if (db.compare("TMSMART") == 0 ||
            db.compare("BankIt") == 0 ||
            db.compare("NCBIFILE") == 0 )
            return 240;
        else
            return 100;
        }
    case e_Patent:                 return 90;
    case e_Pdb:                    return 80;
//  see SQD-4175 ticket for priorities 
    case e_Gibbsq:                 return 72;
    case e_Gibbmt:                 return 71;
    case e_Genbank:                return 70;
    case e_Other:                  return 15;
    default: /* [third party] GB/EMBL/DDBJ */  return 20;
    }
}


int CSeq_id::BaseFastaAAScore(void) const
{
    switch (Which()) {
    case e_not_set: case e_Giim:   return 255;
    case e_Local:                  return 230;
    case e_Gi: return PreferAccessionOverGi() ? kMaxScore + 1 : 120;
    case e_General:
        {
        const string& db = GetGeneral().GetDb();
        if (db.compare("TMSMART") == 0 ||
            db.compare("BankIt") == 0 ||
            db.compare("NCBIFILE") == 0)
            return 240;
        else
            return 90;
        }
    case e_Patent:                 return 80;
    case e_Prf:                    return 70;
    case e_Pdb:                    return 50;
//  see SQD-4175 ticket for priorities 
    case e_Gibbsq:                 return 42;
    case e_Gibbmt:                 return 41;
    case e_Genbank:                return 40;
    case e_Pir:                    return 30;
    case e_Swissprot:              return 20;
    case e_Other:                  return 15;
    default:                       return 60; // [third party] GB/EMBL/DDBJ
    }
}


int CSeq_id::BaseBlastScore(void) const
{
    switch (Which()) {
    case e_Other:                   return 10;
    case e_Swissprot:               return 20;
    case e_Pir:                     return 30;
    case e_Pdb:                     return 40;
    case e_Genbank:                 return 50;
    case e_Embl:                    return 60;
    case e_Ddbj:                    return 70;
    case e_Tpg:                     return 80;
    case e_Tpe:                     return 90;
    case e_Tpd:                     return 100;
    case e_Gpipe:                   return 120;
    case e_Named_annot_track:       return 130;
    case e_Prf:                     return 140;
    case e_Patent:                  return 150;
    case e_Gi: return PreferAccessionOverGi() ? kMaxScore + 1 : 160;
    case e_General:
    {
        const string& db = GetGeneral().GetDb();
        if (db.compare("TMSMART") == 0 ||
            db.compare("BankIt") == 0 ||
            db.compare("NCBIFILE") == 0)
            return 180;
        else
            return 170;
    }
    case e_Local:                   return 190;
    case e_not_set:                 return 250;
    case e_Giim:                    return 251;
    case e_Gibbmt:                  return 252;
    case e_Gibbsq:                  return 253;
    default:                        return 255;
    }
}


int CSeq_id::AdjustScore(int base_score, TAdjustScoreFlags flags) const
{
    int score = base_score * 10;
    if ( IsGeneral() ) {
        const string& db = GetGeneral().GetDb();
        if ( db == "TRACE" ) {
            // prefer "ti" over "TRACE"
            score += 5;
        }
    }
    else if ( const CTextseq_id* text_id = GetTextseq_Id() ) {
        if ( !text_id->IsSetVersion() ) {
            score += 4;
        }
        if ( !text_id->IsSetAccession() ) {
            if ((flags & fRequireAccessions) == 0) {
                score += 3; // still penalize somewhat
            } else {
                score = kMax_Int;
            }
        }
        if ( !text_id->IsSetName() ) {
            score += 2;
        }
    }
    return score;
}

bool CSeq_id::IsValid(const CBioseq::TId& ids, TParseFlags flags)
{
    return false;
}

bool CSeq_id::IsValid(const CSeq_id& id, TParseFlags flags)
{
    return false;
}


CTextseq_id* s_GetTextseq_id(const CSeq_id::E_Choice& choice, CSeq_id& match)
{
    switch ( choice ) {
    case CSeq_id::e_Genbank:
        return &match.SetGenbank();
    case CSeq_id::e_Embl:
        return &match.SetEmbl();
    case CSeq_id::e_Pir:
        return &match.SetPir();
    case CSeq_id::e_Swissprot:
        return &match.SetSwissprot();
    case CSeq_id::e_Other:
        return &match.SetOther();
    case CSeq_id::e_Ddbj:
        return &match.SetDdbj();
    case CSeq_id::e_Prf:
        return &match.SetPrf();
    case CSeq_id::e_Tpg:
        return &match.SetTpg();
    case CSeq_id::e_Tpe:
        return &match.SetTpe();
    case CSeq_id::e_Tpd:
        return &match.SetTpd();
    case CSeq_id::e_Gpipe:
        return &match.SetGpipe();
    case CSeq_id::e_Named_annot_track:
        return &match.SetNamed_annot_track();
    default:
        break;
    }
    return 0;
}


void CSeq_id::GetMatchingTextseqIds(TSeqIdHandles& matches) const
{
    const CTextseq_id* orig = GetTextseq_Id();
    if ( !orig ) return;

    bool A = orig->IsSetAccession();
    CTextseq_id::TAccession av = A ? orig->GetAccession() : kEmptyStr;
    bool v = orig->IsSetVersion();
    CTextseq_id::TVersion vv = v ? orig->GetVersion() : 0;
    bool N = orig->IsSetName();
    CTextseq_id::TName nv = N ? orig->GetName() : kEmptyStr;
    bool r = orig->IsSetRelease();
    CTextseq_id::TRelease rv = r ? orig->GetRelease() : kEmptyStr;

    CSeq_id match;
    CTextseq_id& ti = *s_GetTextseq_id(Which(), match);

    if (A  &&  (v  ||  N  ||  r)) {
        // Accession only
        ti.SetAccession(av);
        matches.insert(CSeq_id_Handle::GetHandle(match));
        if (v  &&  (N  ||  r)) {
            // A.v
            ti.SetVersion(vv);
            matches.insert(CSeq_id_Handle::GetHandle(match));
        }
        if ( N ) {
            // Name only
            ti.Reset();
            ti.SetName(nv);
            matches.insert(CSeq_id_Handle::GetHandle(match));
            if (v  ||  r) {
                if ( r ) {
                    // N.r
                    ti.SetRelease(rv);
                    matches.insert(CSeq_id_Handle::GetHandle(match));
                    ti.ResetRelease();
                }
                // A + N
                ti.SetAccession(av);
                matches.insert(CSeq_id_Handle::GetHandle(match));
                if (v  &&  r) {
                    // A.v + N
                    ti.SetVersion(vv);
                    matches.insert(CSeq_id_Handle::GetHandle(match));
                    // A + N.r
                    ti.ResetVersion();
                    ti.SetRelease(rv);
                    matches.insert(CSeq_id_Handle::GetHandle(match));
                }
            }
        }
    }
    else if (N  &&  (v  ||  r)) {
        // N only
        ti.Reset();
        ti.SetName(nv);
        matches.insert(CSeq_id_Handle::GetHandle(match));
        if (v  &&  r) {
            // N.r
            ti.SetRelease(rv);
            matches.insert(CSeq_id_Handle::GetHandle(match));
        }
    }
}


void CSeq_id::GetMatchingIds(TSeqIdHandles& matches) const
{
    switch ( Which() ) {
    // CTextseq_id
    case CSeq_id::e_Genbank:
    case CSeq_id::e_Embl:
    case CSeq_id::e_Pir:
    case CSeq_id::e_Swissprot:
    case CSeq_id::e_Other:
    case CSeq_id::e_Ddbj:
    case CSeq_id::e_Prf:
    case CSeq_id::e_Tpg:
    case CSeq_id::e_Tpe:
    case CSeq_id::e_Tpd:
    case CSeq_id::e_Gpipe:
    case CSeq_id::e_Named_annot_track:
        GetMatchingTextseqIds(matches);
        break;

    // CPDB_seq_id
    case CSeq_id::e_Pdb:
        // 'rel' is optional
        if ( GetPdb().IsSetRel() ) {
            CSeq_id match;
            match.Assign(*this);
            match.SetPdb().ResetRel();
            matches.insert(CSeq_id_Handle::GetHandle(match));
        }
        break;

    case CSeq_id::e_General:   // CDbtag
        if ( GetGeneral().IsSetTag() ) {
            CSeq_id match;
            if ( match.SetGeneral().SetAsMatchingTo(GetGeneral()) ) {
                matches.insert(CSeq_id_Handle::GetHandle(match));
            }
        }
        break;
    case CSeq_id::e_Local:     // CObject_id
    {
        CSeq_id match;
        if ( match.SetLocal().SetAsMatchingTo(GetLocal()) ) {
            matches.insert(CSeq_id_Handle::GetHandle(match));
        }
        break;
    }
    // Other types have no matching versions.
    case CSeq_id::e_not_set:
    case CSeq_id::e_Gibbsq:    // int
    case CSeq_id::e_Gibbmt:    // int
    case CSeq_id::e_Giim:      // CGiimport_id
    case CSeq_id::e_Patent:    // CPatent_seq_id
    case CSeq_id::e_Gi:        // TGi
        return;
    }
}


NCBI_PARAM_DECL(bool, SeqId, PreferAccessionOverGi);
NCBI_PARAM_DEF_EX(bool, SeqId, PreferAccessionOverGi, false, eParam_NoThread,
    SEQ_ID_PREFER_ACCESSION_OVER_GI);
typedef NCBI_PARAM_TYPE(SeqId, PreferAccessionOverGi) TPreferAccessionOverGi;

NCBI_PARAM_DECL(bool, SeqId, AvoidGi);
NCBI_PARAM_DEF_EX(bool, SeqId, AvoidGi, false, eParam_NoThread,
    SEQ_ID_AVOID_GI);
typedef NCBI_PARAM_TYPE(SeqId, AvoidGi) TAvoidGi;


bool CSeq_id::PreferAccessionOverGi(void)
{
    return TPreferAccessionOverGi::GetDefault() || AvoidGi();
}


bool CSeq_id::AvoidGi(void)
{
    return TAvoidGi::GetDefault();
}


string CSeq_id::ComposeOSLT(list<string>* secondary_id_list,
                            TComposeOSLTFlags parse_flags) const
{
    string primary_id;
    string secondary_id;
    E_Choice seqid_type = Which();
    bool mixed_case = false;

    switch (seqid_type) {
    // CXX-11062 : gibbsq and gibbmt ids are sometimes primary, sometimes
    // secondary. Since it cannot be determined here which of the two is the case,
    // they are returned in both fields.
    // Use same logic for giim, but in fact there are no records in ID with this
    // Seq-id type at all.
    case e_Giim: 
        primary_id = NStr::IntToString(GetGiim().GetId());
        secondary_id = primary_id;
        break;
    case e_Gibbsq: 
        primary_id = NStr::IntToString(GetGibbsq());
        secondary_id = primary_id;
        break;
    case e_Gibbmt:
        primary_id = NStr::IntToString(GetGibbmt());
        secondary_id = primary_id;
        break;
    case e_Pir: 
    case e_Prf:
    {
        // This is a Textseq-id, however primary id is normally stored in the
        // name field.
        // For PIR, if name is empty, id is allowed to be placed in the accession field;
        // For PRF only name is allowed!
        const CTextseq_id* tsid = GetTextseq_Id();
        if (tsid->CanGetName())
            primary_id = tsid->GetName();
        else if (seqid_type == e_Pir && tsid->CanGetAccession())
            primary_id = tsid->GetAccession();
        break;
    }
    case e_Patent:
        if (secondary_id_list) {
            // All patents have GenBank Seq-ids, so id string derived from a patent
            // seqid is always secondary
            const CId_pat& pat = GetPatent().GetCit();
            secondary_id = pat.GetCountry() + "|" +
                (pat.GetId().IsNumber() ?
                 pat.GetId().GetNumber() : pat.GetId().GetApp_number()) + "|" +
                NStr::IntToString(GetPatent().GetSeqid());
        }
        break;
    case e_Pdb:
    {
        const CPDB_seq_id& pdb = GetPdb();
        primary_id = pdb.GetMol().Get();
        // ID-5995 : Use FASTA-style "mol|chain" format as OSLT - with upper case
        // mol but mixed case chain values. This is how they are stored in Cassandra.
        NStr::ToUpper(primary_id);
        if (pdb.IsSetChain_id()) {
            primary_id += "|" + pdb.GetChain_id();
        } else if (pdb.IsSetChain() && pdb.GetChain() != ' ') {
            primary_id += "|" + string(1, (char)pdb.GetChain());
        }
        mixed_case = true;
        break;
    }
    case e_General:
    {
        // General ids are always secondary!
        if (secondary_id_list) {
            const CObject_id& dbtag = GetGeneral().GetTag();
            string suffix =
                (dbtag.IsId() ? NStr::IntToString(dbtag.GetId()) : dbtag.GetStr());
            if (!suffix.empty())
                secondary_id = GetGeneral().GetDb() + "|" + suffix;
        }
        break;
    }
    case e_Gi:
        // GIs are always secondary
        if (secondary_id_list) {
            secondary_id = NStr::NumericToString(GetGi());
        }
        break;
    case CSeq_id::e_Local:
    {
        if ((parse_flags & fAllowLocalId) != 0 && secondary_id_list) {
            const CObject_id& oid = GetLocal();
            if (oid.IsId()) {
                secondary_id = NStr::IntToString(oid.GetId());
            } else if (oid.IsStr()) {
                secondary_id = oid.GetStr();
            }
        }
        break;
    }
    default:
    {
        // In the logic below, any Textseq-id is treated as primary. However a
        // Bioseq object may contain multiple Textseq-ids in its list of Seq-ids, 
        // e.g. when RefSeq takes over a preexisting GPIPE record.
        const CTextseq_id* tsid = GetTextseq_Id();
        if (tsid) {
            if (tsid->CanGetAccession())
                primary_id = tsid->GetAccession();
            if ( secondary_id_list ) {
                if (seqid_type == e_Gpipe
                    &&  (parse_flags & fGpipeAddSecondary) != 0
                    &&  !primary_id.empty()) {
                    if ( tsid->IsSetVersion() )
                        secondary_id = primary_id + "." + to_string(tsid->GetVersion());
                    else
                        secondary_id = primary_id + ".1";
                }
                else if (tsid->CanGetName() && !tsid->GetName().empty()) {
                    secondary_id = tsid->GetName();
                }
            }
        }
        break;
    }
    }

    if (!mixed_case)
        NStr::ToUpper(primary_id);
    if (secondary_id_list && !secondary_id.empty()) {
        NStr::ToUpper(secondary_id);
        secondary_id_list->emplace_back(secondary_id);
    }
    return primary_id;
}


const char* CSeq_id::GetSNPScaleLimit_Name(ESNPScaleLimit value)
{
    switch (value) {
    case CSeq_id::eSNPScaleLimit_Unit: return "unit";
    case CSeq_id::eSNPScaleLimit_Contig: return "contig";
    case CSeq_id::eSNPScaleLimit_Supercontig: return "supercontig";
    case CSeq_id::eSNPScaleLimit_Chromosome: return "chromosome";
    default: return "";
    }
}


CSeq_id::ESNPScaleLimit CSeq_id::GetSNPScaleLimit_Value(const string& name)
{
    if (name == "unit") return eSNPScaleLimit_Unit;
    if (name == "contig") return eSNPScaleLimit_Contig;
    if (name == "supercontig") return eSNPScaleLimit_Supercontig;
    if (name == "chromosome") return eSNPScaleLimit_Chromosome;
    return eSNPScaleLimit_Default;
}


bool CSeq_id::IsAllowedSNPScaleLimit(ESNPScaleLimit scale_limit) const
{
    if (scale_limit == eSNPScaleLimit_Default || IsGi()) return true;
    auto text_id = GetTextseq_Id();
    if (!text_id || !text_id->IsSetAccession() || !text_id->IsSetVersion()) return true;
    EAccessionInfo acc_info = IdentifyAccession();
    if (GetAccType(acc_info) == e_Other) {
        ESNPScaleLimit min_limit = eSNPScaleLimit_Unit;
        switch (acc_info & eAcc_division_mask) {
        case eAcc_chromosome: // AC_ / NC_
            min_limit = eSNPScaleLimit_Chromosome;
            break;
        case eAcc_wgs_intermed: // NW_
            min_limit = eSNPScaleLimit_Supercontig;
            break;
        case eAcc_con: // NT_ / NZ_?(?)
            min_limit = eSNPScaleLimit_Contig;
            break;
        default:
            break;
        }
        if (scale_limit < min_limit) return false;
    }
    return true;
}


SSeqIdRange::SSeqIdRange(const CTempString& s, TFlags flags)
    : start(0), stop(0), digits(0), acc_info(CSeq_id::eAcc_unknown)
{
    size_t pos = 0, n = s.size();
    while (pos < n
           &&  (isalpha((unsigned char) s[pos])
                ||  (((flags & fAllowUnderscores) != 0)  &&  s[pos] == '_'))) {
        prefix += s[pos++];
    }
    while (pos < n  &&  isdigit((unsigned char) s[pos])) {
        start = start * 10 + s[pos++] - '0';
        ++digits;
    }
    if (pos == n) {
        stop = start;
        return;
    } else if (s[pos++] != '-') {
        NCBI_THROW(CSeqIdException, eFormat,
                   "Expected hyphen in range " + string(s));
    }

    {{
        string pfx2;
        while (pos < n
               && (isalpha((unsigned char) s[pos])
                   || (((flags & fAllowUnderscores) != 0) && s[pos] == '_'))) {
            pfx2 += s[pos++];
        }
        if ( !pfx2.empty()  &&  pfx2 != prefix) {
            NCBI_THROW(CSeqIdException, eFormat,
                       "Mismatched prefixes in range " + string(s));
        }
    }}
    if (pos + digits != n) {
        NCBI_THROW(CSeqIdException, eFormat,
                   "Mismatched digit counts in range " + string(s));
    }
    while (pos < n  &&  isdigit((unsigned char) s[pos])) {
        stop = stop * 10 + s[pos++] - '0';
    }    
}


CRef<CSeq_id> SSeqIdRange::const_iterator::GetID(void) const
{
    CRef<CSeq_id> ret;
    static const CSeq_id::TParseFlags flags
        = CSeq_id::fParse_AnyRaw | CSeq_id::fParse_FallbackOK;

    if (m_Range->acc_info == CSeq_id::eAcc_unknown) {
        m_Range->acc_info = CSeq_id::IdentifyAccession(**this, flags);
        if (m_Range->size() > 1  &&  m_Range->digits == 5) {
            // account for possible non-uniformity
            switch (m_Range->prefix[0]) {
            case 'C': case 'D': case 'c': case 'd':
                if (m_Range->prefix.size() == 3) {
                    m_Range->acc_info = CSeq_id::eAcc_unreserved_prot;
                }
            case 'N': case 'n':
                if (m_Range->prefix.size() == 1) {
                    m_Range->acc_info = CSeq_id::eAcc_unreserved_nuc;
                }
            }
        }
    }

    CSeq_id::E_Choice type = CSeq_id::GetAccType(m_Range->acc_info);
    if (type == CSeq_id::e_not_set) {
        ret = new CSeq_id(**this);
    } else {
        ret = new CSeq_id(type, **this);
    }

    return ret;
}


const string& SSeqIdRange::const_iterator::x_SetAccession(void) const
{
    CNcbiOstrstream oss;
    oss << m_Range->prefix << setw(m_Range->digits) << setfill('0') << m_Number;
    m_Accession = CNcbiOstrstreamToString(oss);
    return m_Accession;
}


END_objects_SCOPE // namespace ncbi::objects::
END_NCBI_SCOPE

#undef NCBI_USE_ERRCODE_X
